Skip to content

Commit 77b6a08

Browse files
authored
Add ruff, remove isort, and add more pre-commits (#237)
* add ruff * release notes
1 parent 2dbfc4b commit 77b6a08

21 files changed

+107
-73
lines changed

.flake8

Lines changed: 0 additions & 7 deletions
This file was deleted.

.pre-commit-config.yaml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,34 @@
1+
exclude: |
2+
(?x)
3+
^nlp_primitives/data/nltk-data/|
4+
.html$|.csv$|.svg$|.md$|.txt$|.json$|.xml$|.pickle$|^.github/|
5+
(LICENSE.*|README.*)
16
default_stages: [commit]
2-
exclude: ^LICENSE/|\.(html|csv|svg|md|txt|json|tab|bib|adv)$
37
repos:
4-
- repo: https://github.com/MarcoGorelli/absolufy-imports
5-
rev: v0.3.1
8+
- repo: https://github.com/pre-commit/pre-commit-hooks
9+
rev: 'v4.3.0'
610
hooks:
7-
- id: absolufy-imports
8-
files: ^nlp_primitives/
9-
- repo: https://github.com/PyCQA/isort
10-
rev: 5.0.4
11+
- id: check-yaml
12+
- id: end-of-file-fixer
13+
- id: trailing-whitespace
14+
- repo: https://github.com/abravalheri/validate-pyproject
15+
rev: 'v0.10.1'
1116
hooks:
12-
- id: isort
13-
args: [--settings-path=pyproject.toml]
17+
- id: validate-pyproject
18+
- repo: https://github.com/asottile/add-trailing-comma
19+
rev: 'v2.2.3'
20+
hooks:
21+
- id: add-trailing-comma
22+
name: Add trailing comma
23+
- repo: https://github.com/charliermarsh/ruff-pre-commit
24+
rev: 'v0.0.191'
25+
hooks:
26+
- id: ruff
27+
args: ["--fix"]
1428
- repo: https://github.com/python/black
15-
rev: 22.8.0
29+
rev: 22.12.0
1630
hooks:
1731
- id: black
18-
args: [--target-version=py310]
19-
types_or: [python]
32+
args: [--preview]
33+
additional_dependencies: [".[jupyter]"]
34+
types_or: [python, jupyter]

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2626
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2727
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2828
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Makefile

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,13 @@ clean:
1818

1919
.PHONY: lint
2020
lint:
21-
isort --check-only nlp_primitives
22-
black nlp_primitives -t py310 --check
23-
flake8 nlp_primitives
21+
black . --check --preview
22+
ruff .
2423

2524
.PHONY: lint-fix
2625
lint-fix:
27-
black -t py310 nlp_primitives
28-
isort nlp_primitives
26+
black . --preview
27+
ruff . --fix
2928

3029
.PHONY: test
3130
test:

nlp_primitives/lsa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def __init__(self, random_seed=0, corpus=None, algorithm="randomized"):
8787
self.algorithm = algorithm
8888
if self.algorithm not in ["randomized", "arpack"]:
8989
raise ValueError(
90-
"TruncatedSVD algorithm must be either 'randomized' or 'arpack'"
90+
"TruncatedSVD algorithm must be either 'randomized' or 'arpack'",
9191
)
9292

9393
def _create_trainer(self):

nlp_primitives/part_of_speech_count.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def __init__(self):
3838
self.n = 15
3939

4040
def get_function(self):
41-
4241
# For more info about the different parts of speech, see here: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
4342
types = [
4443
"C", # cardinal digits

nlp_primitives/tensorflow/elmo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def elmo(col):
5454
[
5555
tf.compat.v1.global_variables_initializer(),
5656
tf.compat.v1.tables_initializer(),
57-
]
57+
],
5858
)
5959
embeddings = session.run(self.embed(col.tolist()))
6060
return embeddings.transpose()

nlp_primitives/tensorflow/universal_sentence_encoder.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ class UniversalSentenceEncoder(TransformPrimitive):
2929
return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
3030

3131
def __init__(self):
32-
message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
32+
message = (
33+
"In order to use the UniversalSentenceEncoder primitive install"
34+
" 'nlp_primitives[complete]'"
35+
)
3336
self.tf = import_or_raise("tensorflow", message)
3437
hub = import_or_raise("tensorflow_hub", message)
3538
self.tf.compat.v1.disable_eager_execution()
@@ -45,7 +48,7 @@ def universal_sentence_encoder(col):
4548
[
4649
self.tf.compat.v1.global_variables_initializer(),
4750
self.tf.compat.v1.tables_initializer(),
48-
]
51+
],
4952
)
5053
embeddings = session.run(self.embed(col.tolist()))
5154
return embeddings.transpose()

nlp_primitives/tests/test_diversity_score.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_primitive_func_1(self):
1919
"Not diverse not diverse not",
2020
"this is a semi diverse diverse example",
2121
"a a",
22-
]
22+
],
2323
)
2424
primitive_instance = self.primitive()
2525
primitive_func = primitive_instance.get_function()

nlp_primitives/tests/test_elmo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_regular(self):
2121
"The roller coaster was built in 1885.",
2222
"When will humans go to mars?",
2323
"Mitochondria is the powerhouse of the cell",
24-
]
24+
],
2525
)
2626
new_results = primitive(words)
2727
assert round(sum(new_results[:, 0]), 3) == 8.744

nlp_primitives/tests/test_lsa.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ def test_strings(self):
2121
"She ate a pineapple",
2222
"Consume Electrolytes, he told me.",
2323
"Hello",
24-
]
24+
],
2525
)
2626
primitive_func = self.primitive().get_function()
2727

2828
answers = pd.Series(
2929
[
3030
[2.41e-03, 6.29e-04, 7.26e-03, -1.85e-19],
3131
[1.28e-03, 5.51e-04, 5.37e-03, -1.20e-15],
32-
]
32+
],
3333
)
3434
results = primitive_func(x)
3535
np.testing.assert_array_almost_equal(
@@ -45,7 +45,7 @@ def test_strings_custom_corpus(self):
4545
"She ate a pineapple",
4646
"Consume Electrolytes, he told me.",
4747
"Hello",
48-
]
48+
],
4949
)
5050
# Create a new corpus using only the first 10000 elements from Gutenberg
5151
gutenberg = nltk.corpus.gutenberg.sents()
@@ -62,7 +62,7 @@ def test_strings_custom_corpus(self):
6262
1.56e-03,
6363
0.0,
6464
],
65-
]
65+
],
6666
)
6767
results = primitive_func(x)
6868
np.testing.assert_array_almost_equal(
@@ -96,7 +96,11 @@ def test_with_featuretools(self, es):
9696
primitive_instance = self.primitive()
9797
transform.append(primitive_instance)
9898
valid_dfs(
99-
es, aggregation, transform, self.primitive.name.upper(), multi_output=True
99+
es,
100+
aggregation,
101+
transform,
102+
self.primitive.name.upper(),
103+
multi_output=True,
100104
)
101105

102106
def test_bad_algorithm_input_value(self):
@@ -120,6 +124,8 @@ def test_args_strings(self):
120124

121125
# Test all args
122126
args_string = self.primitive(
123-
random_seed=100, corpus=custom_corpus, algorithm="arpack"
127+
random_seed=100,
128+
corpus=custom_corpus,
129+
algorithm="arpack",
124130
).get_args_string()
125131
assert args_string == ", random_seed=100, corpus=user_defined, algorithm=arpack"

nlp_primitives/tests/test_mean_characters_per_sentence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_sentences(self):
1919
"Ab. Bb. Db.",
2020
"And? Why! Box. Car? Rat.",
2121
"Yep.",
22-
]
22+
],
2323
)
2424
primitive_func = self.primitive().get_function()
2525
answers = pd.Series([3.0, 4.0, 4.0])

nlp_primitives/tests/test_number_of_sentences.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_regular_input(self):
1818
"Hello. Hello! Hello? Hello.",
1919
"and?",
2020
"yes no",
21-
]
21+
],
2222
)
2323
expected = [4.0, 1.0, 1.0]
2424
actual = self.primitive().get_function()(x)
@@ -34,7 +34,7 @@ def test_multiline(self):
3434
x = pd.Series(
3535
[
3636
"Yes\n, this is true!",
37-
]
37+
],
3838
)
3939

4040
expected = [1.0]

nlp_primitives/tests/test_part_of_speech_count.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class TestPartOfSpeechCount(PrimitiveT):
1414

1515
def test_strings(self):
1616
x = pd.Series(
17-
["This IS a STRING.", "Testing AAA", "Testing AAA-BBB", "Testing AA3"]
17+
["This IS a STRING.", "Testing AAA", "Testing AAA-BBB", "Testing AA3"],
1818
)
1919
primitive_func = self.primitive().get_function()
2020

@@ -35,7 +35,7 @@ def test_strings(self):
3535
[0.0, 0.0, 0.0, 0.0],
3636
[0.0, 1.0, 1.0, 1.0],
3737
[0.0, 0.0, 0.0, 0.0],
38-
]
38+
],
3939
)
4040

4141
pd.testing.assert_series_equal(primitive_func(x), answers, check_names=False)
@@ -61,7 +61,7 @@ def test_nan(self):
6161
[np.nan, 0.0, 0.0],
6262
[np.nan, 0.0, 0.0],
6363
[np.nan, 0.0, 0.0],
64-
]
64+
],
6565
)
6666
pd.testing.assert_series_equal(primitive_func(x), answers, check_names=False)
6767

@@ -70,5 +70,9 @@ def test_with_featuretools(self, es):
7070
primitive_instance = self.primitive()
7171
transform.append(primitive_instance)
7272
valid_dfs(
73-
es, aggregation, transform, self.primitive.name.upper(), multi_output=True
73+
es,
74+
aggregation,
75+
transform,
76+
self.primitive.name.upper(),
77+
multi_output=True,
7478
)

nlp_primitives/tests/test_polarity_score.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class TestPolarityScore(PrimitiveT):
1414

1515
def test_primitive_func_1(self):
1616
array = pd.Series(
17-
["He hates cars!", "She loves everything", "This is neutral", "!12323"]
17+
["He hates cars!", "She loves everything", "This is neutral", "!12323"],
1818
)
1919
primitive_instance = self.primitive()
2020
primitive_func = primitive_instance.get_function()

nlp_primitives/tests/test_stopword_count.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_strings(self):
1919
"This is second string",
2020
"third string",
2121
"This IS the fourth string.",
22-
]
22+
],
2323
)
2424
primitive_func = self.primitive().get_function()
2525
answers = pd.Series([3, 2, 0, 3])

nlp_primitives/tests/test_universal_sentence_encoder.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_regular(universal_sentence_encoder):
2121
"The roller coaster was built in 1885.",
2222
"When will humans go to mars?",
2323
"Mitochondria is the powerhouse of the cell",
24-
]
24+
],
2525
)
2626
a = pd.DataFrame(universal_sentence_encoder(sentences))
2727
a = a.mean().round(7).to_numpy()
@@ -43,7 +43,10 @@ def mock_remove_tensorflow():
4343

4444

4545
def test_without_tensorflow(universal_sentence_encoder, mock_remove_tensorflow):
46-
err_message = "In order to use the UniversalSentenceEncoder primitive install 'nlp_primitives[complete]'"
46+
err_message = (
47+
"In order to use the UniversalSentenceEncoder primitive install"
48+
" 'nlp_primitives[complete]'"
49+
)
4750
with pytest.raises(ImportError) as error:
4851
UniversalSentenceEncoder()
4952
assert error.value.args[0] == err_message
@@ -57,7 +60,7 @@ def test_primitive_serialization(universal_sentence_encoder):
5760
"The roller coaster was built in 1885.",
5861
"When will humans go to mars?",
5962
"Mitochondria is the powerhouse of the cell",
60-
]
63+
],
6164
)
6265
serialized_primitive = serialize_primitive(universal_sentence_encoder)
6366
deserializer = PrimitivesDeserializer()
@@ -77,7 +80,7 @@ def test_feature_serialization(universal_sentence_encoder, tmpdir):
7780
"The roller coaster was built in 1885.",
7881
"When will humans go to mars?",
7982
"Mitochondria is the powerhouse of the cell",
80-
]
83+
],
8184
)
8285

8386
es = ft.EntitySet("es")

nlp_primitives/tests/test_utils.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,12 @@ def find_applicable_primitives(primitive):
107107
all_transform_primitives = list(get_transform_primitives().values())
108108
all_aggregation_primitives = list(get_aggregation_primitives().values())
109109
applicable_transforms = find_stackable_primitives(
110-
all_transform_primitives, primitive
110+
all_transform_primitives,
111+
primitive,
111112
)
112113
applicable_aggregations = find_stackable_primitives(
113-
all_aggregation_primitives, primitive
114+
all_aggregation_primitives,
115+
primitive,
114116
)
115117
return applicable_transforms, applicable_aggregations
116118

@@ -153,13 +155,14 @@ def valid_dfs(
153155
applicable_features.append(feat)
154156
if len(applicable_features) == 0:
155157
raise ValueError(
156-
"No feature names with %s, verify the name attribute \
157-
is defined and/or generate_name() is defined to \
158-
return %s "
159-
% (feature_substrings, feature_substrings)
158+
"No feature names with %s, verify the name attribute "
159+
" is defined and/or generate_name() is defined to "
160+
" return %s " % (feature_substrings, feature_substrings),
160161
)
161162
df = ft.calculate_feature_matrix(
162-
entityset=es, features=applicable_features, instance_ids=instance_ids
163+
entityset=es,
164+
features=applicable_features,
165+
instance_ids=instance_ids,
163166
)
164167

165168
ft.encode_features(df, applicable_features)

nlp_primitives/utilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def clean_tokens(text: str) -> List[str]:
1010

1111
# Remove stopwords and punctuation
1212
stopwords_and_punctuation = set(nltk.corpus.stopwords.words("english")).union(
13-
set(string.punctuation)
13+
set(string.punctuation),
1414
)
1515
text = [word for word in text if word not in stopwords_and_punctuation]
1616

0 commit comments

Comments
 (0)