Bumping version to 0.0.9

igorborgest · igorborgest · commit 6fcb149df074 · 2019-10-07T17:10:18.000-03:00
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 > Utility belt to handle data on AWS.
 
-[![Release](https://img.shields.io/badge/release-0.0.8-brightgreen.svg)](https://pypi.org/project/awswrangler/)
+[![Release](https://img.shields.io/badge/release-0.0.9-brightgreen.svg)](https://pypi.org/project/awswrangler/)
 [![Release](https://img.shields.io/pypi/dm/awswrangler.svg)](https://pypi.org/project/awswrangler/)
 [![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7-brightgreen.svg)](https://pypi.org/project/awswrangler/)
 [![Documentation Status](https://readthedocs.org/projects/aws-data-wrangler/badge/?version=latest)](https://aws-data-wrangler.readthedocs.io/en/latest/?badge=latest)
diff --git a/awswrangler/__version__.py b/awswrangler/__version__.py
@@ -1,4 +1,4 @@
 __title__ = "awswrangler"
 __description__ = "Utility belt to handle data on AWS."
-__version__ = "0.0.8"
+__version__ = "0.0.9"
 __license__ = "Apache License 2.0"
diff --git a/awswrangler/pandas.py b/awswrangler/pandas.py
@@ -6,9 +6,9 @@
 import csv
 from datetime import datetime
 
-import pandas
-import pyarrow
-from pyarrow import parquet
+import pandas as pd
+import pyarrow as pa
+from pyarrow import parquet as pq
 
 from awswrangler import data_types
 from awswrangler.exceptions import (UnsupportedWriteMode,
@@ -239,21 +239,20 @@ def _read_csv_iterator(
                         lineterminator=lineterminator)
                     forgotten_bytes = len(body[last_char:])
 
-                df = pandas.read_csv(
-                    StringIO(body[:last_char].decode("utf-8")),
-                    header=header,
-                    names=names,
-                    usecols=usecols,
-                    sep=sep,
-                    quotechar=quotechar,
-                    quoting=quoting,
-                    escapechar=escapechar,
-                    parse_dates=parse_dates,
-                    infer_datetime_format=infer_datetime_format,
-                    lineterminator=lineterminator,
-                    dtype=dtype,
-                    encoding=encoding,
-                    converters=converters)
+                df = pd.read_csv(StringIO(body[:last_char].decode("utf-8")),
+                                 header=header,
+                                 names=names,
+                                 usecols=usecols,
+                                 sep=sep,
+                                 quotechar=quotechar,
+                                 quoting=quoting,
+                                 escapechar=escapechar,
+                                 parse_dates=parse_dates,
+                                 infer_datetime_format=infer_datetime_format,
+                                 lineterminator=lineterminator,
+                                 dtype=dtype,
+                                 encoding=encoding,
+                                 converters=converters)
                 yield df
                 if count == 1:  # first chunk
                     names = df.columns
@@ -402,7 +401,7 @@ def _read_csv_once(
                                    Key=key_path,
                                    Fileobj=buff)
         buff.seek(0),
-        dataframe = pandas.read_csv(
+        dataframe = pd.read_csv(
             buff,
             header=header,
             names=names,
@@ -822,7 +821,7 @@ def _data_to_s3_object_writer(dataframe,
                                   extra_args=None,
                                   isolated_dataframe=False):
         fs = s3.get_fs(session_primitives=session_primitives)
-        fs = pyarrow.filesystem._ensure_filesystem(fs)
+        fs = pa.filesystem._ensure_filesystem(fs)
         s3.mkdir_if_not_exists(fs, path)
 
         if compression is None:
@@ -834,7 +833,7 @@ def _data_to_s3_object_writer(dataframe,
         else:
             raise InvalidCompression(compression)
 
-        guid = pyarrow.compat.guid()
+        guid = pa.compat.guid()
         if file_format == "parquet":
             outfile = f"{guid}.parquet{compression_end}"
         elif file_format == "csv":
@@ -905,9 +904,9 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,
                 logger.debug(f"Casting column {name} Int64 to float64")
 
         # Converting Pandas Dataframe to Pyarrow's Table
-        table = pyarrow.Table.from_pandas(df=dataframe,
-                                          preserve_index=preserve_index,
-                                          safe=False)
+        table = pa.Table.from_pandas(df=dataframe,
+                                     preserve_index=preserve_index,
+                                     safe=False)
 
         # Casting on Pyarrow
         if cast_columns:
@@ -923,11 +922,11 @@ def write_parquet_dataframe(dataframe, path, preserve_index, compression,
 
         # Persisting on S3
         with fs.open(path, "wb") as f:
-            parquet.write_table(table,
-                                f,
-                                compression=compression,
-                                coerce_timestamps="ms",
-                                flavor="spark")
+            pq.write_table(table,
+                           f,
+                           compression=compression,
+                           coerce_timestamps="ms",
+                           flavor="spark")
 
         # Casting back on Pandas if necessary
         if isolated_dataframe is False:
@@ -1047,7 +1046,7 @@ def read_log_query(self,
                     col_name = col["field"]
                 new_row[col_name] = col["value"]
             pre_df.append(new_row)
-        return pandas.DataFrame(pre_df)
+        return pd.DataFrame(pre_df)
 
     @staticmethod
     def normalize_columns_names_athena(dataframe, inplace=True):
diff --git a/awswrangler/spark.py b/awswrangler/spark.py
@@ -1,6 +1,6 @@
 import logging
 
-import pandas
+import pandas as pd
 
 from pyspark.sql.functions import pandas_udf, PandasUDFType, spark_partition_id
 from pyspark.sql.types import TimestampType
@@ -107,7 +107,7 @@ def write(pandas_dataframe):
                 mode="append",
                 procs_cpu_bound=1,
                 cast_columns=casts)
-            return pandas.DataFrame.from_dict({"objects_paths": paths})
+            return pd.DataFrame.from_dict({"objects_paths": paths})
 
         df_objects_paths = dataframe.repartition(numPartitions=num_partitions) \
             .withColumn("aws_data_wrangler_internal_partition_id", spark_partition_id()) \
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,8 +1,8 @@
-yapf>=0.28.0
-flake8>=3.7.8
-pytest>=5.1.0
-cfn-lint>=0.23.3
-twine>=1.13.0
-pyspark>=2.4.3
-wheel>=0.33.6
-sphinx>=2.1.2
+yapf~=0.28.0
+flake8~=3.7.8
+pytest~=5.1.0
+cfn-lint~=0.23.3
+twine~=1.13.0
+pyspark~=2.4.3
+wheel~=0.33.6
+sphinx~=2.1.2
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
-botocore>=1.12.238
-boto3>=1.9.238
-pandas>=0.25.1
-s3fs>=0.3.4
-pyarrow>=0.14.1
-tenacity>=5.1.1
-pg8000>=1.13.2
+botocore~=1.12.239
+boto3~=1.9.239
+pandas~=0.25.1
+s3fs~=0.3.4
+pyarrow~=0.14.0
+tenacity~=5.1.1
+pg8000~=1.13.2
diff --git a/setup.py b/setup.py
@@ -22,12 +22,12 @@
                            exclude=["tests"]),
     python_requires=">=3.6",
     install_requires=[
-        "pyarrow>=0.14.0",
-        "pandas>=0.25.1",
-        "botocore>=1.12.239",
-        "boto3>=1.9.239",
-        "s3fs>=0.3.4",
-        "tenacity>=5.1.1",
-        "pg8000>=1.13.2",
+        "pyarrow~=0.14.0",
+        "pandas~=0.25.1",
+        "botocore~=1.12.239",
+        "boto3~=1.9.239",
+        "s3fs~=0.3.4",
+        "tenacity~=5.1.1",
+        "pg8000~=1.13.2",
     ],
 )
diff --git a/testing/Dockerfile b/testing/Dockerfile
@@ -1,6 +1,6 @@
 FROM openjdk:8-jre-stretch
 
-ARG SPARK_VERSION=2.4.3
+ARG SPARK_VERSION=2.4.4
 
 RUN apt-get update -y
 RUN apt-get install -y jq make build-essential libssl-dev zlib1g-dev libbz2-dev \
@@ -22,13 +22,11 @@ RUN eval "$(pyenv init -)" && \
     curl --url "http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar" --output ${SPARK_HOME}/jars/hadoop-aws-2.7.3.jar && \
     mkdir -p ${SPARK_HOME}/conf && \
     echo spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem >> ${SPARK_HOME}/conf/spark-defaults.conf
-RUN $PIP install aws-sam-cli
-RUN $PIP install awscli
 ADD requirements.txt /root/
-RUN $PIP install -r /root/requirements.txt
+RUN $PIP install --upgrade -r /root/requirements.txt
 RUN rm -rf /root/requirements.txt
 ADD requirements-dev.txt /root/
-RUN $PIP install -r /root/requirements-dev.txt
+RUN $PIP install --upgrade -r /root/requirements-dev.txt
 RUN rm -rf /root/requirements-dev.txt
 
 ENTRYPOINT ["/bin/sh"]
diff --git a/testing/test_awswrangler/test_glue.py b/testing/test_awswrangler/test_glue.py
@@ -2,7 +2,7 @@
 
 import pytest
 import boto3
-import pandas
+import pandas as pd
 
 from awswrangler import Session
 
@@ -53,7 +53,7 @@ def table(
         bucket,
         database,
 ):
-    dataframe = pandas.read_csv("data_samples/micro.csv")
+    dataframe = pd.read_csv("data_samples/micro.csv")
     path = f"s3://{bucket}/test/"
     table = "test"
     session.pandas.to_parquet(dataframe=dataframe,
diff --git a/testing/test_awswrangler/test_pandas.py b/testing/test_awswrangler/test_pandas.py