Skip to content

Commit 936b753

Browse files
authored
Merge pull request #127 from dathere/copy_readbuffer_size_setting
make COPY_READBUFFER_SIZE a configurable parameter
2 parents 07e3170 + 691247a commit 936b753

File tree

3 files changed

+8
-1
lines changed

3 files changed

+8
-1
lines changed

datapusher/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class DataPusherPlusConfig(MutableMapping):
3535
# ckan_service_provider settings
3636
SQLALCHEMY_DATABASE_URI: str = _DATABASE_URI
3737
WRITE_ENGINE_URL: str = _WRITE_ENGINE_URL
38+
COPY_READBUFFER_SIZE: int = 1048576
3839
DEBUG: bool = False
3940
TESTING: bool = False
4041
SECRET_KEY: str = str(uuid.uuid4())

datapusher/dot-env.template

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ WRITE_ENGINE_URL = 'postgresql://datapusher:YOURPASSWORD@localhost/datastore_def
1313
# The connect string of the Datapusher+ Job database
1414
SQLALCHEMY_DATABASE_URI = 'postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs'
1515

16+
# READ BUFFER SIZE IN BYTES WHEN READING CSV FILE WHEN USING POSTGRES COPY
17+
# default 1mb = 1048576
18+
COPY_READBUFFER_SIZE = 1048576
19+
1620
# =============== DOWNLOAD SETTINGS ==============
1721
# 25mb, this is ignored if either PREVIEW_ROWS > 0
1822
MAX_CONTENT_LENGTH = 25600000

datapusher/jobs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
14171417
except psycopg2.Error as e:
14181418
raise util.JobError("Could not connect to the Datastore: {}".format(e))
14191419
else:
1420+
copy_readbuffer_size = config.get("COPY_READBUFFER_SIZE")
14201421
cur = raw_connection.cursor()
14211422
"""
14221423
truncate table to use copy freeze option and further increase
@@ -1441,7 +1442,8 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None):
14411442
sql.Identifier(resource_id),
14421443
column_names,
14431444
)
1444-
with open(tmp, "rb", 8192) as f:
1445+
# specify a 1MB buffer size for COPY read from disk
1446+
with open(tmp, "rb", copy_readbuffer_size) as f:
14451447
try:
14461448
cur.copy_expert(copy_sql, f)
14471449
except psycopg2.Error as e:

0 commit comments

Comments
 (0)