Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ Usage
--port PORT Port of the database
--dry-run Don't commit changes made on the database
--dump-file DUMP_FILE
Create a database dump file with the given name
Create a database dump file with the given name
--dump-options DUMP_OPTIONS
Options to pass to the pg_dump command
--init-sql INIT_SQL SQL to run before starting anonymization
--parallel Data anonymization is done in parallel
Expand Down
4 changes: 3 additions & 1 deletion pganonymize/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def get_arg_parser():
parser.add_argument('--dry-run', action='store_true', help='Don\'t commit changes made on the database',
default=False)
parser.add_argument('--dump-file', help='Create a database dump file with the given name')
parser.add_argument('--dump-options', help='Options to pass to the pg_dump command',
default='--format custom --compress 9')
parser.add_argument('--init-sql', help='SQL to run before starting anonymization', default=False)
parser.add_argument(
'--parallel',
Expand Down Expand Up @@ -101,4 +103,4 @@ def main(args):
logging.info('Anonymization took {:.2f}s'.format(end_time - start_time))

if args.dump_file:
create_database_dump(args.dump_file, pg_args)
create_database_dump(args.dump_file, pg_args, args.dump_options)
11 changes: 6 additions & 5 deletions pganonymize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def truncate_tables(connection):
cursor.close()


def create_database_dump(filename, db_args):
def create_database_dump(filename, db_args, dump_args):
"""
Create a dump file from the current database.
Expand All @@ -298,11 +298,12 @@ def create_database_dump(filename, db_args):
env_vars = ''
if db_args.get('password'):
env_vars += 'PGPASSWORD={password}'.format(password=db_args['password'])
arguments = '-d {dbname} -U {user} -h {host} -p {port}'.format(**db_args)
cmd = '{env_vars}pg_dump -Fc -Z 9 {args} -f {filename}'.format(
arguments = '--dbname {dbname} --username {user} --host {host} --port {port}'.format(**db_args)
cmd = '{env_vars}pg_dump {dump_args} {db_args} --file {filename}'.format(
env_vars='{} '.format(env_vars) if env_vars else '',
args=arguments,
filename=filename
dump_args=dump_args,
db_args=arguments,
filename=filename,
)
logging.info('Creating database dump file "%s"', filename)
subprocess.call(cmd, shell=True)
Expand Down
14 changes: 7 additions & 7 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class TestCli(object):
@pytest.mark.parametrize('cli_args, expected, expected_executes, commit_calls, call_dump', [
['--host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa
[call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
call('SELECT COUNT(*) FROM "auth_user"'),
Expand All @@ -32,7 +32,7 @@ class TestCli(object):
],
['--dry-run --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa
[call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
call('SELECT "id", "first_name", "last_name", "email" FROM "auth_user" LIMIT 100'),
Expand All @@ -42,9 +42,9 @@ class TestCli(object):
],
0, []
],
['--dump-file ./dump.sql --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
['--dump-file ./dump.sql --dump-options "--format plain" --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', dump_options='--format plain', init_sql="set work_mem='1GB'", parallel=False), # noqa
[
call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
Expand All @@ -56,14 +56,14 @@ class TestCli(object):
call('UPDATE "auth_user" t SET "first_name" = s."first_name", "last_name" = s."last_name", "email" = s."email" FROM "tmp_auth_user" s WHERE t."id" = s."id"') # noqa
],
1,
[call('PGPASSWORD=my-cool-password pg_dump -Fc -Z 9 -d db -U root -h localhost -p 5432 -f ./dump.sql', shell=True)] # noqa
[call('PGPASSWORD=my-cool-password pg_dump --format plain --dbname db --username root --host localhost --port 5432 --file ./dump.sql', shell=True)] # noqa
],
['--list-providers --parallel',
Namespace(verbose=None, list_providers=True, schema='schema.yml', dbname=None, user=None,
password='', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql=False, parallel=True), # noqa
password='', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql=False, parallel=True), # noqa
[], 0, []
]
],
])
def test_cli_args(self, subprocess, patched_connect, quote_ident, cli_args, expected, expected_executes, commit_calls, call_dump): # noqa
arg_parser = get_arg_parser()
Expand Down
29 changes: 20 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,33 @@ class TestCreateDatabaseDump(object):

@patch('pganonymize.utils.subprocess.call')
def test(self, mock_call):
create_database_dump(
'/tmp/dump.gz',
{'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432},
)
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}
dump_args = '--format custom --compress 9'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz',
'pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)

@patch('pganonymize.utils.subprocess.call')
def test_with_password(self, mock_call):
create_database_dump(
'/tmp/dump.gz',
{'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'},
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'}
dump_args = '--format custom --compress 9'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'PGPASSWORD=pass pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)

@patch('pganonymize.utils.subprocess.call')
def test_with_custom_dump_args(self, mock_call):
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}
dump_args = '--format plain'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'PGPASSWORD=pass pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz',
'pg_dump --format plain --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)