Skip to content

Commit c981749

Browse files
authored
Merge pull request #7 from IngoKl/dev-0.3.3
0.3.3
2 parents eb59a96 + 8a2b8fe commit c981749

28 files changed

+1613
-1255
lines changed

.travis.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
language: python
2-
python:
3-
- 3.7
4-
install:
5-
- pip install -U tox-travis
6-
- pip install -U spacy
7-
before_script:
8-
- python -m spacy download en_core_web_sm
9-
script:
10-
- tox
1+
language: python
2+
python:
3+
- 3.9
4+
install:
5+
- pip install -U tox-travis
6+
- pip install -U spacy
7+
before_script:
8+
- python -m spacy download en_core_web_sm
9+
script:
10+
- tox

HISTORY.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,19 @@ History
7878
* fixed some minor bugs
7979
* added __str__ method to TextDirectory
8080
* added filename to __str__ output
81-
* added `get_text` method
81+
* added `get_text` method
82+
83+
0.3.3 (2022-09-25)
84+
==================
85+
* added filter_by_filenames
86+
* added filter_by_filename_not_contains
87+
* added transform_to_files
88+
* added transformation_eebop4_to_plaintext
89+
* added transformation_replace_digits
90+
* added transformation_ftfy
91+
* added fast and skip_checkpoint options to load_files
92+
* added __repr__ method to TextDirectory
93+
* added examples
94+
* upgraded to spaCy 3
95+
* improved the test suite
96+
* fixed some minor bugs

LICENSE

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
MIT License
2-
3-
Copyright (c) 2021, Ingo Kleiber
4-
5-
Permission is hereby granted, free of charge, to any person obtaining a copy
6-
of this software and associated documentation files (the "Software"), to deal
7-
in the Software without restriction, including without limitation the rights
8-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9-
copies of the Software, and to permit persons to whom the Software is
10-
furnished to do so, subject to the following conditions:
11-
12-
The above copyright notice and this permission notice shall be included in all
13-
copies or substantial portions of the Software.
14-
15-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21-
SOFTWARE.
22-
1+
MIT License
2+
3+
Copyright (c) 2022, Ingo Kleiber
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+

Makefile

Lines changed: 88 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,88 @@
1-
.PHONY: clean clean-test clean-pyc clean-build docs help
2-
.DEFAULT_GOAL := help
3-
4-
define BROWSER_PYSCRIPT
5-
import os, webbrowser, sys
6-
7-
try:
8-
from urllib import pathname2url
9-
except:
10-
from urllib.request import pathname2url
11-
12-
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13-
endef
14-
export BROWSER_PYSCRIPT
15-
16-
define PRINT_HELP_PYSCRIPT
17-
import re, sys
18-
19-
for line in sys.stdin:
20-
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21-
if match:
22-
target, help = match.groups()
23-
print("%-20s %s" % (target, help))
24-
endef
25-
export PRINT_HELP_PYSCRIPT
26-
27-
BROWSER := python -c "$$BROWSER_PYSCRIPT"
28-
29-
help:
30-
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31-
32-
clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33-
34-
clean-build: ## remove build artifacts
35-
rm -fr build/
36-
rm -fr dist/
37-
rm -fr .eggs/
38-
find . -name '*.egg-info' -exec rm -fr {} +
39-
find . -name '*.egg' -exec rm -f {} +
40-
41-
clean-pyc: ## remove Python file artifacts
42-
find . -name '*.pyc' -exec rm -f {} +
43-
find . -name '*.pyo' -exec rm -f {} +
44-
find . -name '*~' -exec rm -f {} +
45-
find . -name '__pycache__' -exec rm -fr {} +
46-
47-
clean-test: ## remove test and coverage artifacts
48-
rm -fr .tox/
49-
rm -f .coverage
50-
rm -fr htmlcov/
51-
rm -fr .pytest_cache
52-
53-
lint: ## check style with flake8
54-
flake8 textdirectory tests
55-
56-
test: ## run tests quickly with the default Python
57-
py.test
58-
59-
test-all: ## run tests on every Python version with tox
60-
tox
61-
62-
coverage: ## check code coverage quickly with the default Python
63-
coverage run --source textdirectory -m pytest
64-
coverage report -m
65-
coverage html
66-
$(BROWSER) htmlcov/index.html
67-
68-
docs: ## generate Sphinx HTML documentation, including API docs
69-
rm -f docs/textdirectory.rst
70-
rm -f docs/modules.rst
71-
sphinx-apidoc -o docs/ textdirectory
72-
$(MAKE) -C docs clean
73-
$(MAKE) -C docs html
74-
$(BROWSER) docs/_build/html/index.html
75-
76-
servedocs: docs ## compile the docs watching for changes
77-
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
78-
79-
release: dist ## package and upload a release
80-
twine upload dist/*
81-
82-
dist: clean ## builds source and wheel package
83-
python setup.py sdist
84-
python setup.py bdist_wheel
85-
ls -l dist
86-
87-
install: clean ## install the package to the active Python's site-packages
88-
python setup.py install
1+
.PHONY: clean clean-test clean-pyc clean-build docs help
2+
.DEFAULT_GOAL := help
3+
4+
define BROWSER_PYSCRIPT
5+
import os, webbrowser, sys
6+
7+
try:
8+
from urllib import pathname2url
9+
except:
10+
from urllib.request import pathname2url
11+
12+
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13+
endef
14+
export BROWSER_PYSCRIPT
15+
16+
define PRINT_HELP_PYSCRIPT
17+
import re, sys
18+
19+
for line in sys.stdin:
20+
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21+
if match:
22+
target, help = match.groups()
23+
print("%-20s %s" % (target, help))
24+
endef
25+
export PRINT_HELP_PYSCRIPT
26+
27+
BROWSER := python -c "$$BROWSER_PYSCRIPT"
28+
29+
help:
30+
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31+
32+
clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33+
34+
clean-build: ## remove build artifacts
35+
rm -fr build/
36+
rm -fr dist/
37+
rm -fr .eggs/
38+
find . -name '*.egg-info' -exec rm -fr {} +
39+
find . -name '*.egg' -exec rm -f {} +
40+
41+
clean-pyc: ## remove Python file artifacts
42+
find . -name '*.pyc' -exec rm -f {} +
43+
find . -name '*.pyo' -exec rm -f {} +
44+
find . -name '*~' -exec rm -f {} +
45+
find . -name '__pycache__' -exec rm -fr {} +
46+
47+
clean-test: ## remove test and coverage artifacts
48+
rm -fr .tox/
49+
rm -f .coverage
50+
rm -fr htmlcov/
51+
rm -fr .pytest_cache
52+
53+
lint: ## check style with flake8
54+
flake8 textdirectory tests
55+
56+
test: ## run tests quickly
57+
python setup.py test
58+
59+
test-all: ## run tests on every Python version with tox
60+
tox
61+
62+
coverage: ## check code coverage quickly with the default Python
63+
coverage run --source textdirectory -m pytest
64+
coverage report -m
65+
coverage html
66+
$(BROWSER) htmlcov/index.html
67+
68+
docs: ## generate Sphinx HTML documentation, including API docs
69+
rm -f docs/textdirectory.rst
70+
rm -f docs/modules.rst
71+
sphinx-apidoc -o docs/ textdirectory
72+
$(MAKE) -C docs clean
73+
$(MAKE) -C docs html
74+
$(BROWSER) docs/_build/html/index.html
75+
76+
servedocs: docs ## compile the docs watching for changes
77+
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
78+
79+
release: dist ## package and upload a release
80+
twine upload dist/*
81+
82+
dist: clean ## builds source and wheel package
83+
python setup.py sdist
84+
python setup.py bdist_wheel
85+
ls -l dist
86+
87+
install: clean ## install the package to the active Python's site-packages
88+
python setup.py install

0 commit comments

Comments
 (0)