Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion myhoard/basebackup_operation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2019 Aiven, Helsinki, Finland. https://aiven.io/
from contextlib import suppress
from myhoard.errors import XtraBackupError
from myhoard.util import get_tables_to_optimise, mysql_cursor
from rohmu.util import increase_pipe_capacity, set_stream_nonblocking

import base64
Expand Down Expand Up @@ -48,6 +49,7 @@ def __init__(
self.data_directory_filtered_size = None
self.data_directory_size_end = None
self.data_directory_size_start = None
self._error_lines: list[str] = []
self.encryption_algorithm = encryption_algorithm
self.encryption_key = encryption_key
self.log = logging.getLogger(self.__class__.__name__)
Expand Down Expand Up @@ -108,7 +110,7 @@ def create_backup(self):
"--target-dir",
self.temp_dir,
]

self._error_lines = []
with self.stats.timing_manager("myhoard.basebackup.xtrabackup_backup"):
with subprocess.Popen(
command_line, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.PIPE
Expand All @@ -119,6 +121,14 @@ def create_backup(self):
self.data_directory_size_end, self.data_directory_filtered_size = self._get_data_directory_size()
self._update_progress(estimated_progress=100)

def _process_error_lines(self):
with mysql_cursor(**self.mysql_client_params) as cursor:
for table_to_optimise in get_tables_to_optimise(self._error_lines, log=self.log):
sql = f"OPTIMIZE TABLE {table_to_optimise};"
cursor.execute(sql)
self.stats.increase(metric="myhoard.basebackup.optimize_table")
cursor.execute("COMMIT;")

def _get_data_directory_size(self):
total_filtered_size = 0
total_size = 0
Expand Down Expand Up @@ -192,6 +202,7 @@ def _process_input_output(self):
with suppress(Exception):
self.proc.stderr.close()
self.log.info("Joining output reader thread...")
self._process_error_lines()
# We've closed stdout so the thread is bound to exit without any other calls
if reader_thread:
reader_thread.join()
Expand Down Expand Up @@ -226,6 +237,7 @@ def _process_output_line(self, line):
):
if any(key in line for key in ["[ERROR]", " Failed ", " failed ", " Invalid "]):
self.log.error("xtrabackup: %r", line)
self._error_lines.append(line)
else:
self.log.info("xtrabackup: %r", line)

Expand Down
25 changes: 24 additions & 1 deletion myhoard/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cryptography.hazmat.primitives.hashes import SHA1
from logging import Logger
from math import log10
from typing import List, Optional, Tuple
from typing import Iterator, List, Optional, Tuple

import collections
import contextlib
Expand All @@ -25,6 +25,29 @@
ERR_TIMEOUT = 2013


def get_tables_to_optimise(error_lines: list[str], *, log: Logger) -> Iterator[str]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd really prefer a non-lazy return type. Why?

I doubt there's a performance issue here and that generating an iterator helps us in that regard, and it's quite a common bug to try accidentally reusing an iterator.

xtrabackup_lines = [line.split("[Xtrabackup] ", 1)[1] for line in error_lines if "[Xtrabackup] " in line]
while xtrabackup_lines:
line = xtrabackup_lines.pop(0)
if line == "This feature is not stable and will cause backup corruption.":
break
else:
return # did not find the trigger line
if (
(len(xtrabackup_lines) < 3)
or (
not xtrabackup_lines[0]
== "Please check https://docs.percona.com/percona-xtrabackup/8.0/em/instant.html for more details."
)
) or (not xtrabackup_lines[1] == "Tables found:"):
log.error("Unexpected error structure. Cannot identify tables to optimise.")
return
for line in xtrabackup_lines[2:]:
if line.startswith("Please run OPTIMIZE TABLE"):
return
yield line.replace("/", ".")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the output of this function is fed directly to the db via string interpolation. I think we'd need a bit of validation here - hcheck for valid chars only, or perform some escaping so that the wrong data cannot enter the db).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: I'd return a better object, e.g. a 2-tuple with (db, table), then I'd compose it when creating the query, so that we can validate the names separately and we don't have to deal with string content if we want to perform some other manipulation in the future.



@contextlib.contextmanager
def atomic_create_file(file_path, *, binary=False, perm=None, uidgid=None, inherit_owner_from_parent=False):
"""Open a temporary file for writing, rename to final name when done"""
Expand Down
74 changes: 74 additions & 0 deletions test/test_error_line_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from myhoard.util import get_tables_to_optimise
from unittest.mock import call, Mock

# pylint: disable=line-too-long


def test_optimise_errors():
error_lines: list[str] = [
"2022-11-24T01:59:46.971687-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Found tables with row versions due to INSTANT ADD/DROP columns",
"2022-11-24T01:59:46.972112-00:00 0 [ERROR] [MY-011825] [Xtrabackup] This feature is not stable and will cause backup corruption.",
"2022-11-24T01:59:46.972355-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Please check https://docs.percona.com/percona-xtrabackup/8.0/em/instant.html for more details.",
"2022-11-24T01:59:46.972573-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Tables found:",
"2022-11-24T01:59:46.972848-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/action_events",
"2022-11-24T01:59:46.973063-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/nova_notifications",
"2022-11-24T01:59:46.973326-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/banners",
"2022-11-24T01:59:46.973540-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/backend_users",
"2022-11-24T01:59:46.973756-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/courses",
"2022-11-24T01:59:46.974053-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/modules",
"2022-11-24T01:59:46.974409-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/users",
"2022-11-24T01:59:46.974650-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/lessons",
"2022-11-24T01:59:46.974858-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Please run OPTIMIZE TABLE or ALTER TABLE ALGORITHM=COPY on all listed tables to fix this issue.",
]
log = Mock()
tables_to_optimise = set(get_tables_to_optimise(error_lines, log=log))
assert tables_to_optimise == {
"defaultdb.action_events",
"defaultdb.nova_notifications",
"defaultdb.banners",
"defaultdb.backend_users",
"defaultdb.courses",
"defaultdb.modules",
"defaultdb.users",
"defaultdb.lessons",
}
assert log.error.call_args_list == []


def test_optimise_errors_missing_expected_structure():
error_lines: list[str] = [
"2022-11-24T01:59:46.971687-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Found tables with row versions due to INSTANT ADD/DROP columns",
"2022-11-24T01:59:46.972112-00:00 0 [ERROR] [MY-011825] [Xtrabackup] This feature is not stable and will cause backup corruption.",
"2022-11-24T01:59:46.972355-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Please check https://docs.percona.com/percona-xtrabackup/8.0/em/instant.html for more details.",
"2022-11-24T01:59:46.972848-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/action_events",
"2022-11-24T01:59:46.973063-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/nova_notifications",
"2022-11-24T01:59:46.973326-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/banners",
"2022-11-24T01:59:46.973540-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/backend_users",
"2022-11-24T01:59:46.973756-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/courses",
"2022-11-24T01:59:46.974053-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/modules",
"2022-11-24T01:59:46.974409-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/users",
"2022-11-24T01:59:46.974650-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/lessons",
"2022-11-24T01:59:46.974858-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Please run OPTIMIZE TABLE or ALTER TABLE ALGORITHM=COPY on all listed tables to fix this issue.",
]
log = Mock()
tables_to_optimise = set(get_tables_to_optimise(error_lines, log=log))
assert tables_to_optimise == set()
assert log.error.call_args_list == [call("Unexpected error structure. Cannot identify tables to optimise.")]


def test_other_errors():
error_lines: list[str] = [
"2022-11-24T01:59:46.972573-00:00 0 [ERROR] [MY-011825] [Xtrabackup] Tables found:",
"2022-11-24T01:59:46.972848-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/action_events",
"2022-11-24T01:59:46.973063-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/nova_notifications",
"2022-11-24T01:59:46.973326-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/banners",
"2022-11-24T01:59:46.973540-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/backend_users",
"2022-11-24T01:59:46.973756-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/courses",
"2022-11-24T01:59:46.974053-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/modules",
"2022-11-24T01:59:46.974409-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/users",
"2022-11-24T01:59:46.974650-00:00 0 [ERROR] [MY-011825] [Xtrabackup] defaultdb/lessons",
]
log = Mock()
tables_to_optimise = set(get_tables_to_optimise(error_lines, log=log))
assert tables_to_optimise == set()
assert log.error.call_args_list == []