diff --git a/postgres-appliance/bootstrap/clone_with_wale.py b/postgres-appliance/bootstrap/clone_with_wale.py index e8d31962d..10e97f043 100755 --- a/postgres-appliance/bootstrap/clone_with_wale.py +++ b/postgres-appliance/bootstrap/clone_with_wale.py @@ -20,16 +20,23 @@ def read_configuration(): parser = argparse.ArgumentParser(description="Script to clone from S3 with support for point-in-time-recovery") - parser.add_argument('--scope', required=True, help='target cluster name') - parser.add_argument('--datadir', required=True, help='target cluster postgres data directory') + parser.add_argument('--scope', required=True, + help='target cluster name') + parser.add_argument('--datadir', required=True, + help='target cluster postgres data directory') parser.add_argument('--recovery-target-time', help='the timestamp up to which recovery will proceed (including time zone)', dest='recovery_target_time_string') - parser.add_argument('--dry-run', action='store_true', help='find a matching backup and build the wal-e ' + parser.add_argument('--dry-run', action='store_true', + help='find a matching backup and build the wal-e ' 'command to fetch that backup without running it') + parser.add_argument('--recovery-target-timeline', + help='the timeline up to which recovery will proceed. Leave empty for latest.', + dest='recovery_target_timeline', + type=lambda timeline_id: int(timeline_id,16)) args = parser.parse_args() - options = namedtuple('Options', 'name datadir recovery_target_time dry_run') + options = namedtuple('Options', 'name datadir recovery_target_time recovery_target_timeline dry_run') if args.recovery_target_time_string: recovery_target_time = parse(args.recovery_target_time_string) if recovery_target_time.tzinfo is None: @@ -37,8 +44,21 @@ def read_configuration(): else: recovery_target_time = None - return options(args.scope, args.datadir, recovery_target_time, args.dry_run) + if args.recovery_target_timeline == None: + recovery_target_timeline = get_latest_timeline() + else: + recovery_target_timeline = args.recovery_target_timeline + + return options(args.scope, args.datadir, recovery_target_time, recovery_target_timeline, args.dry_run) +def get_latest_timeline(): + env = os.environ.copy() + backup_list = list_backups(env) + latest_timeline_id = int("00000000",16) + for backup in backup_list: + if int(backup["name"][5:13], 16) > latest_timeline_id: + latest_timeline_id = int(backup["name"][5:13], 16) + return latest_timeline_id def build_wale_command(command, datadir=None, backup=None): cmd = ['wal-g' if os.getenv('USE_WALG_RESTORE') == 'true' else 'wal-e'] + [command] @@ -65,16 +85,18 @@ def fix_output(output): yield '\t'.join(line.split()) -def choose_backup(backup_list, recovery_target_time): +def choose_backup(backup_list, recovery_target_time, recovery_target_timeline): """ pick up the latest backup file starting before time recovery_target_time""" match_timestamp = match = None for backup in backup_list: - last_modified = parse(backup['last_modified']) - if last_modified < recovery_target_time: - if match is None or last_modified > match_timestamp: - match = backup - match_timestamp = last_modified + timeline_id = int(backup["name"][5:13], 16) + if timeline_id == recovery_target_timeline: + last_modified = parse(backup['last_modified']) + if last_modified < recovery_target_time: + if match is None or last_modified > match_timestamp: + match = backup + match_timestamp = last_modified if match is not None: return match['name'] @@ -140,7 +162,7 @@ def get_wale_environments(env): yield name, orig_value -def find_backup(recovery_target_time, env): +def find_backup(recovery_target_time, recovery_target_timeline, env): old_value = None for name, value in get_wale_environments(env): logger.info('Trying %s for clone', value) @@ -150,11 +172,11 @@ def find_backup(recovery_target_time, env): backup_list = list_backups(env) if backup_list: if recovery_target_time: - backup = choose_backup(backup_list, recovery_target_time) + backup = choose_backup(backup_list, recovery_target_time, recovery_target_timeline) if backup: return backup, (name if value != old_value else None) else: # We assume that the LATEST backup will be for the biggest postgres version! - return 'LATEST', (name if value != old_value else None) + return get_latest_timeline(), (name if value != old_value else None) if recovery_target_time: raise Exception('Could not find any backups prior to the point in time {0}'.format(recovery_target_time)) raise Exception('Could not find any backups') @@ -163,7 +185,7 @@ def find_backup(recovery_target_time, env): def run_clone_from_s3(options): env = os.environ.copy() - backup_name, update_envdir = find_backup(options.recovery_target_time, env) + backup_name, update_envdir = find_backup(options.recovery_target_time, options.recovery_target_timeline, env) backup_fetch_cmd = build_wale_command('backup-fetch', options.datadir, backup_name) logger.info("cloning cluster %s using %s", options.name, ' '.join(backup_fetch_cmd)) diff --git a/postgres-appliance/scripts/configure_spilo.py b/postgres-appliance/scripts/configure_spilo.py index 2102d2eed..abb07a38f 100755 --- a/postgres-appliance/scripts/configure_spilo.py +++ b/postgres-appliance/scripts/configure_spilo.py @@ -226,7 +226,7 @@ def deep_update(a, b): method: clone_with_wale clone_with_wale: command: envdir "{{CLONE_WALE_ENV_DIR}}" python3 /scripts/clone_with_wale.py - --recovery-target-time="{{CLONE_TARGET_TIME}}" + --recovery-target-time="{{CLONE_TARGET_TIME}}" --recovery-target-timeline="{{CLONE_TARGET_TIMELINE}}" recovery_conf: restore_command: envdir "{{CLONE_WALE_ENV_DIR}}" timeout "{{WAL_RESTORE_TIMEOUT}}" /scripts/restore_command.sh "%f" "%p"