Skip to content

Commit 6fcb149

Browse files
committed
Bumping version to 0.0.9
1 parent 9c6d601 commit 6fcb149

File tree

10 files changed

+91
-94
lines changed

10 files changed

+91
-94
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
> Utility belt to handle data on AWS.
44
5-
[![Release](https://img.shields.io/badge/release-0.0.8-brightgreen.svg)](https://pypi.org/project/awswrangler/)
5+
[![Release](https://img.shields.io/badge/release-0.0.9-brightgreen.svg)](https://pypi.org/project/awswrangler/)
66
[![Release](https://img.shields.io/pypi/dm/awswrangler.svg)](https://pypi.org/project/awswrangler/)
77
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7-brightgreen.svg)](https://pypi.org/project/awswrangler/)
88
[![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/en/latest/?badge=latest)

awswrangler/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
__title__ = "awswrangler"
22
__description__ = "Utility belt to handle data on AWS."
3-
__version__ = "0.0.8"
3+
__version__ = "0.0.9"
44
__license__ = "Apache License 2.0"

awswrangler/pandas.py

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import csv
77
from datetime import datetime
88

9-
import pandas
10-
import pyarrow
11-
from pyarrow import parquet
9+
import pandas as pd
10+
import pyarrow as pa
11+
from pyarrow import parquet as pq
1212

1313
from awswrangler import data_types
1414
from awswrangler.exceptions import (UnsupportedWriteMode,
@@ -239,21 +239,20 @@ def _read_csv_iterator(
239239
lineterminator=lineterminator)
240240
forgotten_bytes = len(body[last_char:])
241241

242-
df = pandas.read_csv(
243-
StringIO(body[:last_char].decode("utf-8")),
244-
header=header,
245-
names=names,
246-
usecols=usecols,
247-
sep=sep,
248-
quotechar=quotechar,
249-
quoting=quoting,
250-
escapechar=escapechar,
251-
parse_dates=parse_dates,
252-
infer_datetime_format=infer_datetime_format,
253-
lineterminator=lineterminator,
254-
dtype=dtype,
255-
encoding=encoding,
256-
converters=converters)
242+
df = pd.read_csv(StringIO(body[:last_char].decode("utf-8")),
243+
header=header,
244+
names=names,
245+
usecols=usecols,
246+
sep=sep,
247+
quotechar=quotechar,
248+
quoting=quoting,
249+
escapechar=escapechar,
250+
parse_dates=parse_dates,
251+
infer_datetime_format=infer_datetime_format,
252+
lineterminator=lineterminator,
253+
dtype=dtype,
254+
encoding=encoding,
255+
converters=converters)
257256
yield df
258257
if count == 1: # first chunk
259258
names = df.columns
@@ -402,7 +401,7 @@ def _read_csv_once(
402401
Key=key_path,
403402
Fileobj=buff)
404403
buff.seek(0),
405-
dataframe = pandas.read_csv(
404+
dataframe = pd.read_csv(
406405
buff,
407406
header=header,
408407
names=names,
@@ -822,7 +821,7 @@ def _data_to_s3_object_writer(dataframe,
822821
extra_args=None,
823822
isolated_dataframe=False):
824823
fs = s3.get_fs(session_primitives=session_primitives)
825-
fs = pyarrow.filesystem._ensure_filesystem(fs)
824+
fs = pa.filesystem._ensure_filesystem(fs)
826825
s3.mkdir_if_not_exists(fs, path)
827826

828827
if compression is None:
@@ -834,7 +833,7 @@ def _data_to_s3_object_writer(dataframe,
834833
else:
835834
raise InvalidCompression(compression)
836835

837-
guid = pyarrow.compat.guid()
836+
guid = pa.compat.guid()
838837
if file_format == "parquet":
839838
outfile = f"{guid}.parquet{compression_end}"
840839
elif file_format == "csv":
@@ -905,9 +904,9 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,
905904
logger.debug(f"Casting column {name} Int64 to float64")
906905

907906
# Converting Pandas Dataframe to Pyarrow's Table
908-
table = pyarrow.Table.from_pandas(df=dataframe,
909-
preserve_index=preserve_index,
910-
safe=False)
907+
table = pa.Table.from_pandas(df=dataframe,
908+
preserve_index=preserve_index,
909+
safe=False)
911910

912911
# Casting on Pyarrow
913912
if cast_columns:
@@ -923,11 +922,11 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,
923922

924923
# Persisting on S3
925924
with fs.open(path, "wb") as f:
926-
parquet.write_table(table,
927-
f,
928-
compression=compression,
929-
coerce_timestamps="ms",
930-
flavor="spark")
925+
pq.write_table(table,
926+
f,
927+
compression=compression,
928+
coerce_timestamps="ms",
929+
flavor="spark")
931930

932931
# Casting back on Pandas if necessary
933932
if isolated_dataframe is False:
@@ -1047,7 +1046,7 @@ def read_log_query(self,
10471046
col_name = col["field"]
10481047
new_row[col_name] = col["value"]
10491048
pre_df.append(new_row)
1050-
return pandas.DataFrame(pre_df)
1049+
return pd.DataFrame(pre_df)
10511050

10521051
@staticmethod
10531052
def normalize_columns_names_athena(dataframe, inplace=True):

awswrangler/spark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22

3-
import pandas
3+
import pandas as pd
44

55
from pyspark.sql.functions import pandas_udf, PandasUDFType, spark_partition_id
66
from pyspark.sql.types import TimestampType
@@ -107,7 +107,7 @@ def write(pandas_dataframe):
107107
mode="append",
108108
procs_cpu_bound=1,
109109
cast_columns=casts)
110-
return pandas.DataFrame.from_dict({"objects_paths": paths})
110+
return pd.DataFrame.from_dict({"objects_paths": paths})
111111

112112
df_objects_paths = dataframe.repartition(numPartitions=num_partitions) \
113113
.withColumn("aws_data_wrangler_internal_partition_id", spark_partition_id()) \

requirements-dev.txt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
yapf>=0.28.0
2-
flake8>=3.7.8
3-
pytest>=5.1.0
4-
cfn-lint>=0.23.3
5-
twine>=1.13.0
6-
pyspark>=2.4.3
7-
wheel>=0.33.6
8-
sphinx>=2.1.2
1+
yapf~=0.28.0
2+
flake8~=3.7.8
3+
pytest~=5.1.0
4+
cfn-lint~=0.23.3
5+
twine~=1.13.0
6+
pyspark~=2.4.3
7+
wheel~=0.33.6
8+
sphinx~=2.1.2

requirements.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
botocore>=1.12.238
2-
boto3>=1.9.238
3-
pandas>=0.25.1
4-
s3fs>=0.3.4
5-
pyarrow>=0.14.1
6-
tenacity>=5.1.1
7-
pg8000>=1.13.2
1+
botocore~=1.12.239
2+
boto3~=1.9.239
3+
pandas~=0.25.1
4+
s3fs~=0.3.4
5+
pyarrow~=0.14.0
6+
tenacity~=5.1.1
7+
pg8000~=1.13.2

setup.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@
2222
exclude=["tests"]),
2323
python_requires=">=3.6",
2424
install_requires=[
25-
"pyarrow>=0.14.0",
26-
"pandas>=0.25.1",
27-
"botocore>=1.12.239",
28-
"boto3>=1.9.239",
29-
"s3fs>=0.3.4",
30-
"tenacity>=5.1.1",
31-
"pg8000>=1.13.2",
25+
"pyarrow~=0.14.0",
26+
"pandas~=0.25.1",
27+
"botocore~=1.12.239",
28+
"boto3~=1.9.239",
29+
"s3fs~=0.3.4",
30+
"tenacity~=5.1.1",
31+
"pg8000~=1.13.2",
3232
],
3333
)

testing/Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
FROM openjdk:8-jre-stretch
22

3-
ARG SPARK_VERSION=2.4.3
3+
ARG SPARK_VERSION=2.4.4
44

55
RUN apt-get update -y
66
RUN apt-get install -y jq make build-essential libssl-dev zlib1g-dev libbz2-dev \
@@ -22,13 +22,11 @@ RUN eval "$(pyenv init -)" && \
2222
curl --url "http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar" --output ${SPARK_HOME}/jars/hadoop-aws-2.7.3.jar && \
2323
mkdir -p ${SPARK_HOME}/conf && \
2424
echo spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem >> ${SPARK_HOME}/conf/spark-defaults.conf
25-
RUN $PIP install aws-sam-cli
26-
RUN $PIP install awscli
2725
ADD requirements.txt /root/
28-
RUN $PIP install -r /root/requirements.txt
26+
RUN $PIP install --upgrade -r /root/requirements.txt
2927
RUN rm -rf /root/requirements.txt
3028
ADD requirements-dev.txt /root/
31-
RUN $PIP install -r /root/requirements-dev.txt
29+
RUN $PIP install --upgrade -r /root/requirements-dev.txt
3230
RUN rm -rf /root/requirements-dev.txt
3331

3432
ENTRYPOINT ["/bin/sh"]

testing/test_awswrangler/test_glue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pytest
44
import boto3
5-
import pandas
5+
import pandas as pd
66

77
from awswrangler import Session
88

@@ -53,7 +53,7 @@ def table(
5353
bucket,
5454
database,
5555
):
56-
dataframe = pandas.read_csv("data_samples/micro.csv")
56+
dataframe = pd.read_csv("data_samples/micro.csv")
5757
path = f"s3://{bucket}/test/"
5858
table = "test"
5959
session.pandas.to_parquet(dataframe=dataframe,

0 commit comments

Comments
 (0)