Skip to content

Commit 54a0d2c

Browse files
committed
Bumping version to 0.0.3
1 parent 6af5c7c commit 54a0d2c

File tree

5 files changed

+35
-17
lines changed

5 files changed

+35
-17
lines changed

awswrangler/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
__title__ = "awswrangler"
22
__description__ = "Utility belt to handle data on AWS."
3-
__version__ = "0.0.2"
3+
__version__ = "0.0.3"
44
__license__ = "Apache License 2.0"

awswrangler/glue.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,19 @@ def create_table(self,
203203
TableInput=table_input)
204204

205205
def add_partitions(self, database, table, partition_paths, file_format,
206-
extra_args):
206+
compression, extra_args):
207207
if not partition_paths:
208208
return None
209209
partitions = list()
210210
for partition in partition_paths:
211211
if file_format == "parquet":
212212
partition_def = Glue.parquet_partition_definition(
213-
partition=partition)
213+
partition=partition, compression=compression)
214214
elif file_format == "csv":
215215
partition_def = Glue.csv_partition_definition(
216-
partition=partition, extra_args=extra_args)
216+
partition=partition,
217+
compression=compression,
218+
extra_args=extra_args)
217219
else:
218220
raise UnsupportedFileFormat(file_format)
219221
partitions.append(partition_def)
@@ -225,8 +227,12 @@ def add_partitions(self, database, table, partition_paths, file_format,
225227
DatabaseName=database,
226228
TableName=table,
227229
PartitionInputList=page)
228-
if len(res["Errors"]) > 0:
229-
raise ApiError(f"{res['Errors'][0]}")
230+
for error in res["Errors"]:
231+
if "ErrorDetail" in error:
232+
if "ErrorCode" in error["ErrorDetail"]:
233+
if error["ErrorDetail"][
234+
"ErrorCode"] != "AlreadyExistsException":
235+
raise ApiError(f"{error}")
230236

231237
def get_connection_details(self, name):
232238
return self._client_glue.get_connection(
@@ -355,7 +361,7 @@ def csv_table_definition(table, partition_cols_schema, schema, path,
355361
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
356362
"OutputFormat":
357363
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
358-
"Compressed": True,
364+
"Compressed": compressed,
359365
"NumberOfBuckets": -1,
360366
"SerdeInfo": {
361367
"Parameters": param,
@@ -375,7 +381,8 @@ def csv_table_definition(table, partition_cols_schema, schema, path,
375381
}
376382

377383
@staticmethod
378-
def csv_partition_definition(partition, extra_args):
384+
def csv_partition_definition(partition, compression, extra_args):
385+
compressed = False if compression is None else True
379386
sep = extra_args["sep"] if "sep" in extra_args else ","
380387
serde = extra_args.get("serde")
381388
if serde == "OpenCSVSerDe":
@@ -394,6 +401,7 @@ def csv_partition_definition(partition, extra_args):
394401
"StorageDescriptor": {
395402
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
396403
"Location": partition[0],
404+
"Compressed": compressed,
397405
"SerdeInfo": {
398406
"Parameters": param,
399407
"SerializationLibrary": serde_fullname,
@@ -454,11 +462,13 @@ def parquet_table_definition(table, partition_cols_schema, schema, path,
454462
}
455463

456464
@staticmethod
457-
def parquet_partition_definition(partition):
465+
def parquet_partition_definition(partition, compression):
466+
compressed = False if compression is None else True
458467
return {
459468
"StorageDescriptor": {
460469
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
461470
"Location": partition[0],
471+
"Compressed": compressed,
462472
"SerdeInfo": {
463473
"Parameters": {
464474
"serialization.format": "1"

awswrangler/pandas.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,22 @@ def read_sql_athena(self,
433433
parse_dates=parse_timestamps,
434434
quoting=csv.QUOTE_ALL,
435435
max_result_size=max_result_size)
436-
if len(ret.index) > 0:
436+
if max_result_size is None:
437+
if len(ret.index) > 0:
438+
for col in parse_dates:
439+
ret[col] = ret[col].dt.date
440+
return ret
441+
else:
442+
return Pandas._apply_dates_to_generator(
443+
generator=ret, parse_dates=parse_dates)
444+
445+
@staticmethod
446+
def _apply_dates_to_generator(generator, parse_dates):
447+
for df in generator:
448+
if len(df.index) > 0:
437449
for col in parse_dates:
438-
ret[col] = ret[col].dt.date
439-
return ret
450+
df[col] = df[col].dt.date
451+
yield df
440452

441453
def to_csv(
442454
self,

building/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM lambci/lambda:build-python3.7
1+
FROM lambci/lambda:build-python3.6
22

33
RUN pip install --upgrade pip
44

testing/test_awswrangler/test_pandas.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,6 @@ def test_to_s3(
241241
list(dataframe2.columns))
242242
else:
243243
assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
244-
assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[
245-
dataframe2["id"] == 0].iloc[0]["name"]
246244

247245

248246
def test_to_parquet_with_cast(
@@ -594,8 +592,6 @@ def test_to_csv_with_sep(
594592
sleep(2)
595593
assert len(dataframe.index) == len(dataframe2.index)
596594
assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
597-
assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[
598-
dataframe2["id"] == 0].iloc[0]["name"]
599595

600596

601597
def test_to_csv_serde_exception(

0 commit comments

Comments
 (0)