Skip to content

Commit 715aac3

Browse files
authored
Merge pull request #128 from zypp-io/development
Release 0.9.1
2 parents 3523ee6 + ed1dd4e commit 715aac3

File tree

9 files changed

+56
-55
lines changed

9 files changed

+56
-55
lines changed

.github/workflows/ci.yaml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ jobs:
1313
runs-on: ubuntu-latest
1414
strategy:
1515
matrix:
16-
python-version: [3.8, 3.9]
16+
python-version: ['3.10']
1717

1818
steps:
19-
- uses: actions/checkout@v2
19+
- uses: actions/checkout@v4
2020
- name: Set up Python ${{ matrix.python-version }}
21-
uses: actions/setup-python@v2
21+
uses: actions/setup-python@v5
2222
with:
2323
python-version: ${{ matrix.python-version }}
2424
- name: Install dependencies
@@ -28,12 +28,6 @@ jobs:
2828
- name: Run pre-commit
2929
run: |
3030
pre-commit run --all-files
31-
- name: assert equality between setup.cfg and requirements.txt
32-
uses: actions/checkout@v2
33-
- name: setup python
34-
uses: actions/setup-python@v2
35-
with:
36-
python-version: 3.8
3731
- name: execute py script
3832
run: |
3933
python ./scripts/check_setupcfg_and_requirements_equal.py

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,6 @@ settings.yml
144144

145145
# mac
146146
.DS_Store
147+
148+
#VSCode
149+
.vscode/

df_to_azure/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from .export import df_to_azure
44

5-
__version__ = "0.9.0"
5+
__version__ = "0.9.1"
66

77
logging.basicConfig(
88
format="%(asctime)s.%(msecs)03d [%(levelname)-5s] [%(name)s] - %(message)s",

df_to_azure/db.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
from sqlalchemy import create_engine
66
from sqlalchemy.exc import ProgrammingError
7+
from sqlalchemy.sql import text
78

8-
from df_to_azure.exceptions import UpsertError
9+
from df_to_azure.exceptions import DriverError, UpsertError
910

1011

1112
class SqlUpsert:
@@ -71,7 +72,15 @@ def create_stored_procedure(self):
7172
)
7273

7374

74-
def auth_azure(driver: str = "ODBC Driver 17 for SQL Server"):
75+
def auth_azure(driver: str = None):
76+
77+
if driver is None:
78+
import pyodbc
79+
80+
try:
81+
driver = pyodbc.drivers()[-1]
82+
except IndexError:
83+
raise DriverError("ODBC driver not found")
7584

7685
connection_string = "mssql+pyodbc://{}:{}@{}:1433/{}?driver={}".format(
7786
os.environ.get("SQL_USER"),
@@ -98,6 +107,5 @@ def execute_stmt(stmt: str):
98107
99108
"""
100109
with auth_azure() as con:
101-
t = con.begin()
102-
con.execute(stmt)
103-
t.commit()
110+
with con.begin():
111+
con.execute(text(stmt))

df_to_azure/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,9 @@ class UpsertError(Exception):
3434
"""For the moment upsert gives an error"""
3535

3636
pass
37+
38+
39+
class DriverError(Exception):
40+
"""Can't find correct odbc driver"""
41+
42+
pass

df_to_azure/export.py

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,8 @@
77
import azure.core.exceptions
88
import pandas as pd
99
from azure.storage.blob import BlobServiceClient
10-
from numpy import dtype
11-
from pandas import (
12-
BooleanDtype,
13-
CategoricalDtype,
14-
DataFrame,
15-
DatetimeTZDtype,
16-
Float64Dtype,
17-
Int8Dtype,
18-
Int16Dtype,
19-
Int32Dtype,
20-
Int64Dtype,
21-
StringDtype,
22-
)
10+
from pandas import CategoricalDtype, DataFrame
11+
from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_float_dtype, is_integer_dtype, is_string_dtype
2312
from sqlalchemy.sql.visitors import VisitableType
2413
from sqlalchemy.types import BigInteger, Boolean, DateTime, Integer, Numeric, String
2514

@@ -234,29 +223,26 @@ def column_types(self) -> dict:
234223
"""
235224
string = String(length=self.text_length)
236225
numeric = Numeric(precision=18, scale=self.decimal_precision)
237-
type_conversion = {
238-
dtype("O"): string,
239-
StringDtype(): string,
240-
dtype("int64"): Integer(),
241-
dtype("int32"): Integer(),
242-
dtype("int16"): Integer(),
243-
dtype("int8"): Integer(),
244-
Int8Dtype(): Integer(),
245-
Int16Dtype(): Integer(),
246-
Int32Dtype(): Integer(),
247-
Int64Dtype(): Integer(),
248-
Float64Dtype(): numeric,
249-
dtype("float64"): numeric,
250-
dtype("float32"): numeric,
251-
dtype("float16"): numeric,
252-
dtype("<M8[ns]"): DateTime(),
253-
dtype("bool"): Boolean(),
254-
BooleanDtype(): Boolean(),
255-
DatetimeTZDtype(tz="utc"): DateTime(),
256-
CategoricalDtype(): string,
257-
}
258226

259-
col_types = {col_name: type_conversion[col_type] for col_name, col_type in self.df.dtypes.to_dict().items()}
227+
def convert_type(col_name, col_type):
228+
if is_string_dtype(col_type):
229+
return string
230+
elif is_bool_dtype(col_type):
231+
return Boolean()
232+
elif is_integer_dtype(col_type):
233+
return Integer()
234+
elif is_float_dtype(col_type):
235+
return numeric
236+
elif is_datetime64_any_dtype(col_type):
237+
return DateTime()
238+
elif isinstance(col_type, CategoricalDtype):
239+
return string
240+
else:
241+
raise ValueError(f"Column {col_name} has unknown dtype: {col_type}")
242+
243+
col_types = {
244+
col_name: convert_type(col_name, col_type) for col_name, col_type in self.df.dtypes.to_dict().items()
245+
}
260246

261247
return col_types
262248

df_to_azure/tests/test_general.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def test_mapping_column_types():
4141
"Date": dr1,
4242
"Timedelta": dr1 - dr2,
4343
"Bool": [True, False, True],
44+
"Categorical": Series(["a", "b", "c"], dtype="category"),
4445
}
4546
)
4647
df_to_azure(
@@ -68,6 +69,7 @@ def test_mapping_column_types():
6869
"Date",
6970
"Timedelta",
7071
"Bool",
72+
"Categorical",
7173
],
7274
"DATA_TYPE": [
7375
"varchar",
@@ -83,9 +85,10 @@ def test_mapping_column_types():
8385
"datetime",
8486
"numeric",
8587
"bit",
88+
"varchar",
8689
],
87-
"CHARACTER_MAXIMUM_LENGTH": [255, 255, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
88-
"NUMERIC_PRECISION": [nan, nan, 10, 10, 10, 10, 10, 10, 18, 18, nan, 18, nan],
90+
"CHARACTER_MAXIMUM_LENGTH": [255, 255, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 255],
91+
"NUMERIC_PRECISION": [nan, nan, 10, 10, 10, 10, 10, 10, 18, 18, nan, 18, nan, nan],
8992
}
9093
)
9194

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
keyvault
12
pre-commit
23
pytest

setup.cfg

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[metadata]
22
name = df_to_azure
3-
version = 0.9.0
4-
author = Melvin Folkers, Erfan Nariman
5-
author_email = melvin@zypp.io, erfan@zypp.io
3+
version = 0.9.1
4+
author = Zypp
5+
author_email = hello@zypp.io
66
description = Automatically write pandas DataFrames to SQL by creating pipelines in Azure Data Factory with copy activity from blob to SQL
77
long_description = file: README.md
88
long_description_content_type = text/markdown

0 commit comments

Comments
 (0)