From 3abf045f1e5823b95ebbdd04a2d88c88110204f8 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Sat, 2 Nov 2024 21:38:13 -0500 Subject: [PATCH 1/6] Merge branch 'spacy-update-1' --- .coveragerc | 7 +- .github/workflows/docs.yml | 2 +- .github/workflows/pr.yml | 4 +- .github/workflows/publish.yml | 6 +- .github/workflows/tox.yml | 10 +- .readthedocs.yml | 7 +- dev-requirements.txt | 166 +++++++++---------- notes/pip-tools.md | 7 +- pyproject.toml | 5 +- requirements.txt | 297 +++++++++++++++++++--------------- setup.cfg | 85 ++++++---- setup.py | 3 +- src/qrmine/content.py | 5 +- src/qrmine/mlqrmine.py | 2 +- src/qrmine/network.py | 10 +- src/qrmine/nlp_qrmine.py | 42 ++--- tests/conftest.py | 6 +- tests/test_nlp.py | 4 + 18 files changed, 372 insertions(+), 296 deletions(-) diff --git a/.coveragerc b/.coveragerc index 224e586..14f9abe 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,9 +1,14 @@ # .coveragerc to control coverage.py [run] branch = True -source = */nlp_qrmine/* +source = qrmine # omit = bad_file.py +[paths] +source = + src/ + */site-packages/ + [report] # Regexes for lines to exclude from consideration exclude_lines = diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f3b6a2e..5a5f30a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.7' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 39112e1..f742724 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -13,8 +13,8 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7] - os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.11"] + os: [ubuntu-latest, macos-13, windows-latest] runs-on: ${{ matrix.os }} timeout-minutes: 20 steps: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5615dac..7da783a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,9 +12,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5.1.1 with: - python-version: '3.7' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -24,7 +24,7 @@ jobs: python setup.py bdist_wheel - name: Publish distribution 📦 to PyPI if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@master with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 39ff65a..2b436ed 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -1,4 +1,4 @@ -name: Tox on release +name: Tox Test on: push: @@ -9,22 +9,22 @@ jobs: build: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 20 strategy: max-parallel: 4 matrix: - python-version: [3.7] + python-version: ["3.11"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5.1.1 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt -r dev-requirements.txt + pip install -r dev-requirements.txt -r requirements.txt python -m spacy download en_core_web_sm - name: Test with tox run: | diff --git a/.readthedocs.yml b/.readthedocs.yml index 1133df0..a2bcab3 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -16,7 +16,12 @@ sphinx: formats: - pdf +build: + os: ubuntu-22.04 + tools: + python: "3.11" + python: - version: 3.8 install: - requirements: docs/requirements.txt + - {path: ., method: pip} diff --git a/dev-requirements.txt b/dev-requirements.txt index e96e708..b7fa3e6 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,161 +1,155 @@ # -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile dev-requirements.in # -alabaster==0.7.12 +alabaster==1.0.0 # via sphinx -attrs==21.4.0 - # via pytest -babel==2.9.1 +babel==2.16.0 # via sphinx -certifi==2021.10.8 +build==1.2.2.post1 + # via pip-tools +cachetools==5.5.0 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # tox +certifi==2024.8.30 + # via + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # requests -charset-normalizer==2.0.12 +chardet==5.2.0 + # via tox +charset-normalizer==3.4.0 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # requests -click==8.1.2 +click==8.1.7 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # pip-tools +colorama==0.4.6 + # via tox commonmark==0.9.1 # via recommonmark -coverage[toml]==6.3.2 +coverage[toml]==7.6.4 # via pytest-cov -distlib==0.3.4 +distlib==0.3.9 # via virtualenv -docutils==0.17.1 +docutils==0.21.2 # via # recommonmark # sphinx -filelock==3.6.0 +exceptiongroup==1.2.2 + # via pytest +filelock==3.16.1 # via # tox # virtualenv -idna==3.3 +idna==3.10 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # requests -imagesize==1.3.0 +imagesize==1.4.1 # via sphinx -importlib-metadata==4.11.3 ; python_version < "3.8" +iniconfig==2.0.0 + # via pytest +jinja2==3.1.4 # via - # -c requirements.txt - # click - # pep517 - # pluggy - # pytest + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # sphinx - # tox - # virtualenv -iniconfig==1.1.1 - # via pytest -jinja2==3.1.1 - # via sphinx -markupsafe==2.1.1 - # via jinja2 -packaging==21.3 +markupsafe==3.0.2 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # jinja2 +packaging==24.1 + # via + # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # build + # pyproject-api # pytest # setuptools-scm # sphinx # tox -pep517==0.12.0 - # via pip-tools -pip-tools==6.6.0 +pip-tools==7.4.1 # via -r dev-requirements.in -platformdirs==2.5.1 - # via virtualenv -pluggy==1.0.0 +platformdirs==4.3.6 # via - # pytest # tox -py==1.11.0 + # virtualenv +pluggy==1.5.0 # via # pytest # tox -pygments==2.11.2 - # via sphinx -pyparsing==3.0.7 +pygments==2.18.0 + # via + # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # sphinx +pyproject-api==1.8.0 + # via tox +pyproject-hooks==1.2.0 # via - # -c requirements.txt - # packaging -pytest==7.1.2 + # build + # pip-tools +pytest==8.3.3 # via # -r dev-requirements.in # pytest-cov -pytest-cov==3.0.0 +pytest-cov==6.0.0 # via -r dev-requirements.in -pytz==2022.1 - # via - # -c requirements.txt - # babel recommonmark==0.7.1 # via -r dev-requirements.in -requests==2.27.1 +requests==2.32.3 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # sphinx -setuptools-scm==6.4.2 +setuptools-scm==8.1.0 # via -r dev-requirements.in -six==1.16.0 - # via - # -c requirements.txt - # tox - # virtualenv snowballstemmer==2.2.0 # via sphinx -sphinx==4.5.0 +sphinx==8.1.3 # via # -r dev-requirements.in # recommonmark -sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -toml==0.10.2 - # via tox -tomli==2.0.1 +tomli==2.0.2 # via + # build # coverage - # pep517 + # pip-tools + # pyproject-api # pytest # setuptools-scm -tox==3.24.5 + # sphinx + # tox +tox==4.23.2 # via -r dev-requirements.in -typing-extensions==4.1.1 +typing-extensions==4.12.2 # via - # -c requirements.txt - # importlib-metadata -urllib3==1.26.9 + # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # tox +urllib3==2.2.3 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # requests -virtualenv==20.14.0 +virtualenv==20.27.1 # via tox -wheel==0.37.1 +wheel==0.44.0 # via - # -c requirements.txt + # -c /home/beapen/repos/nlp-qrmine/requirements.txt # -r dev-requirements.in # pip-tools -zipp==3.8.0 - # via - # -c requirements.txt - # importlib-metadata - # pep517 # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/notes/pip-tools.md b/notes/pip-tools.md index d656035..2bbbecb 100644 --- a/notes/pip-tools.md +++ b/notes/pip-tools.md @@ -15,4 +15,9 @@ OR * pip install pre-commit -* pre-commit install \ No newline at end of file +* pre-commit install + +## uv + +* pip install uv +* uv pip compile setup.cfg -o requirements.txt --universal \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2c63dbb..89a5bed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,9 @@ [build-system] # AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD! -requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5", "wheel"] +requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] -# See configuration details in https://github.com/pypa/setuptools_scm +# For smarter version schemes and other configuration options, +# check out https://github.com/pypa/setuptools_scm version_scheme = "no-guess-dev" diff --git a/requirements.txt b/requirements.txt index debaaba..1fe7438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,116 +1,134 @@ -# -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: -# -# pip-compile -# -absl-py==1.0.0 +# This file was autogenerated by uv via the following command: +# uv pip compile setup.cfg -o requirements.txt --universal +absl-py==2.1.0 # via # tensorboard # tensorflow astunparse==1.6.3 # via tensorflow -blis==0.7.7 - # via - # spacy - # thinc -cached-property==1.5.2 - # via h5py -cachetools==5.0.0 +blis==0.7.11 + # via thinc +cachetools==5.5.0 # via # google-auth # textacy -catalogue==1.0.0 +catalogue==2.0.10 # via # spacy + # srsly + # textacy # thinc -certifi==2023.7.22 +certifi==2024.8.30 # via requests -charset-normalizer==2.0.12 +charset-normalizer==3.4.0 # via requests -click==8.1.2 - # via qrmine (setup.py) -cycler==0.11.0 +click==8.1.7 + # via + # qrmine (setup.cfg) + # typer +cloudpathlib==0.20.0 + # via weasel +colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows' + # via + # click + # tqdm + # wasabi +confection==0.1.5 + # via + # thinc + # weasel +contourpy==1.3.0 + # via matplotlib +cycler==0.12.1 # via matplotlib -cymem==2.0.6 +cymem==2.0.8 # via # preshed # spacy # thinc -cytoolz==0.11.2 +cytoolz==1.0.0 # via textacy -flatbuffers==2.0 +flatbuffers==24.3.25 # via tensorflow -fonttools==4.31.2 +floret==0.10.5 + # via textacy +fonttools==4.54.1 # via matplotlib -gast==0.5.3 +gast==0.4.0 # via tensorflow -google-auth==2.6.2 +google-auth==2.35.0 # via # google-auth-oauthlib # tensorboard -google-auth-oauthlib==0.4.6 +google-auth-oauthlib==1.0.0 # via tensorboard google-pasta==0.2.0 # via tensorflow -grpcio==1.44.0 +grpcio==1.67.1 # via # tensorboard # tensorflow -h5py==3.6.0 +h5py==3.12.1 # via tensorflow -idna==3.3 +idna==3.10 # via requests -imbalanced-learn==0.9.0 - # via qrmine (setup.py) -importlib-metadata==4.11.3 ; python_version < "3.8" - # via - # catalogue - # click - # markdown - # qrmine (setup.py) -jellyfish==0.9.0 +imbalanced-learn==0.12.4 + # via qrmine (setup.cfg) +jellyfish==1.1.0 # via textacy -joblib==1.2.0 +jinja2==3.1.4 + # via spacy +joblib==1.4.2 # via # imbalanced-learn # mlxtend # scikit-learn # textacy -keras==2.8.0 - # via tensorflow -keras-preprocessing==1.1.2 +keras==2.13.1 # via tensorflow -kiwisolver==1.4.2 +kiwisolver==1.4.7 # via matplotlib -libclang==13.0.0 +langcodes==3.4.1 + # via spacy +language-data==1.2.0 + # via langcodes +libclang==18.1.1 # via tensorflow -markdown==3.3.6 +marisa-trie==1.2.1 + # via language-data +markdown==3.7 # via tensorboard -matplotlib==3.5.1 +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 + # via + # jinja2 + # werkzeug +matplotlib==3.9.2 # via + # qrmine (setup.cfg) # mlxtend - # qrmine (setup.py) -mlxtend==0.19.0 - # via qrmine (setup.py) -murmurhash==1.0.6 +mdurl==0.1.2 + # via markdown-it-py +mlxtend==0.23.1 + # via qrmine (setup.cfg) +murmurhash==1.0.10 # via # preshed # spacy # thinc -networkx==2.6.3 +networkx==3.4.2 # via textacy -numpy==1.21.5 +numpy==1.24.3 # via # blis + # contourpy + # floret # h5py # imbalanced-learn - # keras-preprocessing # matplotlib # mlxtend - # opt-einsum # pandas - # pyemd # scikit-learn # scipy # spacy @@ -119,147 +137,174 @@ numpy==1.21.5 # textacy # thinc # xgboost +nvidia-nccl-cu12==2.23.4 ; platform_machine != 'aarch64' and platform_system == 'Linux' + # via xgboost oauthlib==3.2.2 # via requests-oauthlib -opt-einsum==3.3.0 +opt-einsum==3.4.0 # via tensorflow -packaging==21.3 - # via matplotlib -pandas==1.3.5 - # via - # mlxtend - # qrmine (setup.py) -pillow==9.3.0 - # via matplotlib -plac==1.1.3 +packaging==24.1 # via + # matplotlib # spacy + # tensorflow # thinc -preshed==3.0.6 + # weasel +pandas==2.1.0 + # via + # qrmine (setup.cfg) + # mlxtend +pillow==11.0.0 + # via matplotlib +preshed==3.0.9 # via # spacy # thinc -protobuf==3.20.0 +protobuf==4.25.5 # via # tensorboard # tensorflow -pyasn1==0.4.8 +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 +pyasn1-modules==0.4.1 # via google-auth -pyemd==0.5.1 - # via textacy -pyparsing==3.0.7 +pydantic==1.10.18 # via - # matplotlib - # packaging -pyphen==0.12.0 + # confection + # spacy + # thinc + # weasel +pygments==2.18.0 + # via rich +pyparsing==3.2.0 + # via matplotlib +pyphen==0.17.0 # via textacy -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via # matplotlib # pandas -pytz==2022.1 +pytz==2024.2 # via pandas -requests==2.31.0 +requests==2.32.3 # via # requests-oauthlib # spacy # tensorboard # textacy # vadersentiment -requests-oauthlib==1.3.1 + # weasel +requests-oauthlib==2.0.0 # via google-auth-oauthlib -rsa==4.8 +rich==13.9.4 + # via typer +rsa==4.9 # via google-auth -scikit-learn==1.0.2 +scikit-learn==1.5.2 # via + # qrmine (setup.cfg) # imbalanced-learn # mlxtend - # qrmine (setup.py) # textacy -scipy==1.7.3 +scipy==1.14.1 # via # imbalanced-learn # mlxtend # scikit-learn # textacy # xgboost +setuptools==75.3.0 + # via + # marisa-trie + # spacy + # tensorboard + # tensorflow + # thinc +shellingham==1.5.4 + # via typer six==1.16.0 # via - # absl-py # astunparse - # google-auth # google-pasta - # grpcio - # keras-preprocessing # python-dateutil # tensorflow -spacy==2.3.7 +smart-open==7.0.5 + # via weasel +spacy==3.7.5 # via - # qrmine (setup.py) + # qrmine (setup.cfg) # textacy -srsly==1.0.5 +spacy-legacy==3.0.12 + # via spacy +spacy-loggers==1.0.5 + # via spacy +srsly==2.4.8 # via + # confection # spacy - # textacy # thinc -tensorboard==2.8.0 + # weasel +tensorboard==2.13.0 # via tensorflow -tensorboard-data-server==0.6.1 - # via tensorboard -tensorboard-plugin-wit==1.8.1 +tensorboard-data-server==0.7.2 # via tensorboard -tensorflow==2.8.0 - # via qrmine (setup.py) -tensorflow-io-gcs-filesystem==0.24.0 +tensorflow==2.13.1 + # via qrmine (setup.cfg) +tensorflow-estimator==2.13.0 # via tensorflow -termcolor==1.1.0 - # via tensorflow -textacy==0.10.0 - # via qrmine (setup.py) -tf-estimator-nightly==2.8.0.dev2021122109 +tensorflow-io-gcs-filesystem==0.31.0 + # via + # qrmine (setup.cfg) + # tensorflow +termcolor==2.5.0 # via tensorflow -thinc==7.4.5 +textacy==0.13.0 + # via qrmine (setup.cfg) +thinc==8.2.5 # via spacy -threadpoolctl==3.1.0 +threadpoolctl==3.5.0 # via # imbalanced-learn # scikit-learn -toolz==0.11.2 +toolz==1.0.0 # via cytoolz -tqdm==4.64.0 +tqdm==4.66.6 # via # spacy # textacy - # thinc -typing-extensions==4.1.1 +typer==0.12.5 # via - # importlib-metadata - # kiwisolver + # spacy + # weasel +typing-extensions==4.5.0 + # via + # pydantic # tensorflow -urllib3==1.26.9 + # typer +tzdata==2024.2 + # via pandas +urllib3==2.2.3 # via requests vadersentiment==3.3.2 - # via qrmine (setup.py) -wasabi==0.9.1 + # via qrmine (setup.cfg) +wasabi==1.1.3 # via # spacy # thinc -werkzeug==2.1.1 + # weasel +weasel==0.4.1 + # via spacy +werkzeug==3.1.1 # via tensorboard -wheel==0.37.1 +wheel==0.44.0 # via # astunparse # tensorboard -wrapt==1.14.0 - # via tensorflow -xgboost==1.5.2 - # via qrmine (setup.py) -zipp==3.8.0 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools +wrapt==1.14.1 + # via + # smart-open + # tensorflow +xgboost==2.1.2 + # via qrmine (setup.cfg) diff --git a/setup.cfg b/setup.cfg index 69abafb..f3c1207 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,42 +1,57 @@ # This file is used to configure your project. # Read more about the various options under: -# http://setuptools.readthedocs.io/en/latest/setuptools.html#configuring-setup-using-setup-cfg-files +# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html +# https://setuptools.pypa.io/en/latest/references/keywords.html [metadata] name = qrmine description = Qualitative Research support tools in Python! author = beapen author_email = github@gulfdoctor.net -license = gpl3 -url = https://github.com/dermatologist/nlp-qrmine +license = GPL-3.0-only +# license_files = LICENSE.txt +# long_description = file: README.rst +# long_description_content_type = text/x-rst; charset=UTF-8 long_description = file: README.md long_description_content_type = text/markdown +url = https://github.com/dermatologist/nlp-qrmine +# Add here related links, for example: +project_urls = + Documentation = https://arxiv.org/abs/2003.13519 +# Source = https://github.com/pyscaffold/pyscaffold/ +# Changelog = https://pyscaffold.org/en/latest/changelog.html +# Tracker = https://github.com/pyscaffold/pyscaffold/issues +# Conda-Forge = https://anaconda.org/conda-forge/pyscaffold +# Download = https://pypi.org/project/PyScaffold/#files +# Twitter = https://twitter.com/PyScaffold + # Change if running only on Windows, Mac or Linux (comma-separated) platforms = any + # Add here all kinds of additional classifiers as defined under -# https://pypi.python.org/pypi?%3Aaction=list_classifiers +# https://pypi.org/classifiers/ classifiers = Intended Audience :: Science/Research Development Status :: 4 - Beta Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.11 Topic :: Scientific/Engineering :: Information Analysis + [options] zip_safe = False packages = find_namespace: include_package_data = True package_dir = =src -# Add here dependencies of your project (semicolon-separated), e.g. -# install_requires = numpy; scipy -# install_requires = numpy; pandas; matplotlib; imbalanced-learn==0.4.3; scikit-learn==0.20.4; xgboost; mlxtend; Keras; keras-text; click; vaderSentiment; spacy; textacy; tensorflow>=1.13.1 -# Format +# Require a min/specific Python version (comma-separated conditions) +# python_requires = >=3.8 +# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. +# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in +# new major versions. This works if the required packages follow Semantic Versioning. +# For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" pandas @@ -47,18 +62,10 @@ install_requires = vaderSentiment xgboost mlxtend - spacy==2.3.7 - textacy==0.10.0 - tensorflow - # below is not supported by pypi - # en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm - -# install_requires=pyemd; numpy; pandas; matplotlib; click; scikit-learn==0.20.4; imbalanced-learn; vaderSentiment; xgboost; mlxtend; spacy>=2.2.0,<3.0.0; textacy==0.8.0; tensorflow - - - -# Add here test requirements (semicolon-separated) -tests_require = pytest; pytest-cov + spacy + textacy + tensorflow==2.13.1 + tensorflow-io-gcs-filesystem==0.31.0 [options.packages.find] where = src @@ -70,6 +77,12 @@ exclude = # `pip install qrmine[PDF]` like: # PDF = ReportLab; RXP +# Add here test requirements (semicolon/line-separated) +testing = + setuptools + pytest + pytest-cov + [options.entry_points] # Add here console scripts like: # console_scripts = @@ -80,19 +93,15 @@ exclude = # And any other entry points, for example: # pyscaffold.cli = # awesome = pyscaffoldext.awesome.extension:AwesomeExtension - console_scripts = qrmine = qrmine.main:main_routine -[test] -# py.test options when running `python setup.py test` -addopts = tests - [tool:pytest] -# Options for py.test: -# Specify command line options as you would do when invoking py.test directly. +# Specify command line options as you would do when invoking pytest directly. # e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml # in order to write a coverage file that can be read by Jenkins. +# CAUTION: --cov flags may prohibit setting breakpoints while debugging. +# Comment those flags to avoid this pytest issue. addopts = --verbose norecursedirs = @@ -111,14 +120,24 @@ universal = 1 source_dir = docs build_dir = docs/_build +testpaths = tests +# Use pytest markers to select/deselect specific tests +# markers = +# slow: mark tests as slow (deselect with '-m "not slow"') +# system: mark end-to-end system tests + [devpi:upload] # Options for the devpi: PyPI server and packaging tool # VCS export must be deactivated since we are using setuptools-scm -no-vcs = 1 +no_vcs = 1 formats = bdist_wheel [flake8] # Some sane defaults for the code style checker flake8 +max_line_length = 88 +extend_ignore = E203, W503 +# ^ Black-compatible +# E203 and W503 have edge cases handled by black exclude = .tox build @@ -129,5 +148,5 @@ exclude = [pyscaffold] # PyScaffold's parameters when the project was created. # This will be used when updating. Do not change! -version = 4.0.2 +version = 4.6 package = qrmine diff --git a/setup.py b/setup.py index 8a2ef86..0f264c5 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,11 @@ Setup file for qrmine. Use setup.cfg to configure your project. - This file was generated with PyScaffold 4.0.2. + This file was generated with PyScaffold 4.6. PyScaffold helps you to put up the scaffold of your new Python project. Learn more under: https://pyscaffold.org/ """ + from setuptools import setup if __name__ == "__main__": diff --git a/src/qrmine/content.py b/src/qrmine/content.py index 576a246..3344a80 100644 --- a/src/qrmine/content.py +++ b/src/qrmine/content.py @@ -19,13 +19,14 @@ import operator -import en_core_web_sm +# import en_core_web_sm +import textacy class Content(object): def __init__(self, content): self._content = content - self._nlp = en_core_web_sm.load() + self._nlp = textacy.load_spacy_lang("en_core_web_sm") self._processed = self._nlp(self._content) self._lemma = {} self._pos = {} diff --git a/src/qrmine/mlqrmine.py b/src/qrmine/mlqrmine.py index 68ee457..12b75a3 100644 --- a/src/qrmine/mlqrmine.py +++ b/src/qrmine/mlqrmine.py @@ -100,7 +100,7 @@ def read_csv(self): def mark_missing(self): self._dataset_original = self._dataset - self._dataset = self._dataset.replace('', numpy.NaN) + self._dataset = self._dataset.replace('', numpy.nan) self._dataset.dropna(inplace=True) def restore_mark_missing(self): diff --git a/src/qrmine/network.py b/src/qrmine/network.py index 7f4f2f2..c012637 100644 --- a/src/qrmine/network.py +++ b/src/qrmine/network.py @@ -1,6 +1,4 @@ -import textacy.network - - +import textacy.viz.network as network class Network(object): def __init__(self): @@ -10,15 +8,15 @@ def __init__(self): self._axis = None def sents_to_network(self, sents): - self._graph = textacy.network.sents_to_semantic_network(sents, normalize='lemma', edge_weighting='cosine') + self._graph = network.sents_to_semantic_network(sents, normalize='lemma', edge_weighting='cosine') return self._graph def terms_to_network(self, terms): - self._graph = textacy.network.terms_to_semantic_network(terms, normalize='lemma', edge_weighting='cosine') + self._graph = network.terms_to_semantic_network(terms, normalize='lemma', edge_weighting='cosine') return self._graph def draw_graph(self, draw=False): - self._axis = textacy.viz.network.draw_semantic_network(self._graph, node_weights=None, spread=3.0, + self._axis = network.draw_semantic_network(self._graph, node_weights=None, spread=3.0, draw_nodes=draw, base_node_size=300, node_alpha=0.25, line_width=0.5, line_alpha=0.1, diff --git a/src/qrmine/nlp_qrmine.py b/src/qrmine/nlp_qrmine.py index 2e131b9..44209c9 100644 --- a/src/qrmine/nlp_qrmine.py +++ b/src/qrmine/nlp_qrmine.py @@ -1,6 +1,6 @@ import subprocess import textacy -from textacy.vsm.vectorizers import Vectorizer +from textacy.representations.vectorizers import Vectorizer import textacy.tm from textacy import preprocessing @@ -15,7 +15,7 @@ def __init__(self): self._min_occurrence_for_topic = 2 self._common_verbs = 10 # create an empty corpus - self._en = textacy.load_spacy_lang('en_core_web_sm', disable=('parser',)) + self._en = textacy.load_spacy_lang('en_core_web_sm') self._corpus = textacy.Corpus(lang=self._en) self._content = None self._model = None @@ -24,8 +24,8 @@ def __init__(self): self._terms = None self._doc_term_matrix = None self._doc_topic_matrix = None - self._vectorizer = Vectorizer(tf_type='linear', apply_idf=True, idf_type='smooth', - norm='l2', min_df=3, max_df=0.95, max_n_terms=100000) + self._vectorizer = Vectorizer(tf_type='linear', idf_type='smooth', + norm='l2', min_df=2, max_df=0.95, max_n_terms=100000) @property def content(self): @@ -58,9 +58,9 @@ def get_git_revision_short_hash(self): # return subprocess.check_output(['git', 'log', '-1', '--format=%cd']).strip().decode("utf-8")[10:] def print_categories(self, doc, num=10): - bot = doc._.to_bag_of_terms(ngrams=(1, 2, 3), named_entities=False, normalize='lemma', weighting='freq', - as_strings=True, filter_stops=True, filter_punct=True, filter_nums=True, min_freq=2, - drop_determiners=True, include_types=["NOUN", "VERB"]) + textacy.spacier.extensions.set_doc_extensions("extract.bags") + bot = doc._.to_bag_of_terms(by='lemma_', weighting='freq', + ngs=(1,2,3), ents=True, ncs=True, dedupe=True) categories = sorted(bot.items(), key=lambda x: x[1], reverse=True)[:num] output = [] to_return = [] @@ -86,7 +86,7 @@ def category_basket(self, num=10): for index, title in enumerate(self._content.titles): # QRMines content should be set content = self._content.documents[index] this_record = Content(content) - doc = textacy.make_spacy_doc(this_record.doc) + doc = textacy.make_spacy_doc(this_record.doc, lang=self._en) item_basket.append(self.print_categories(doc, num)) return item_basket # Example return: @@ -199,7 +199,8 @@ def process_content(self): # 2-Jan-2020 textacy new version, breaking change # replace numbers with NUM, remove punct and convert to lower case - doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower() + # doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower() + doc_text = preprocessing.replace.numbers(preprocessing.remove.punctuation(document)).lower() doc = textacy.make_spacy_doc((doc_text, metadata), lang=self._en) self._corpus.add_doc(doc) @@ -216,8 +217,8 @@ def filter_content(self, titles): # textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True), # metadata=metadata) #doc_text = textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True) - doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower() - + # doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower() + doc_text = preprocessing.replace.numbers(preprocessing.remove.punctuation(document)).lower() doc = textacy.make_spacy_doc((doc_text, metadata), lang=self._en) self._corpus.add_doc(doc) @@ -226,16 +227,15 @@ def filter_content(self, titles): self.load_matrix() def load_matrix(self): - self._doc_term_matrix = self._vectorizer.fit_transform( - (documents._.to_terms_list(ngrams=(1, 2, 3), named_entities=True, - as_strings=True, filter_stops=True, - filter_punct=True, filter_nums=True, - min_freq=2) - for documents in self._corpus.docs)) + textacy.spacier.extensions.set_doc_extensions("extract.keyterms") + terms = ((term.text for term in textacy.extract.terms(doc, ngs=1, ents=True))for doc in self._corpus.docs) + self._doc_term_matrix = self._vectorizer.fit_transform(terms) self._numdocs, self._terms = self._doc_term_matrix.shape - self._model = textacy.tm.TopicModel('nmf', n_topics=self._numdocs) + self._model = textacy.tm.TopicModel('lda', n_topics=self._numdocs) self._model.fit(self._doc_term_matrix) + try: + self._doc_topic_matrix = self._model.transform(self._doc_term_matrix) - self._doc_topic_matrix = self._model.transform(self._doc_term_matrix) - - _, self._numtopics = self._doc_topic_matrix.shape + _, self._numtopics = self._doc_topic_matrix.shape + except ValueError: + print("No topics found") diff --git a/tests/conftest.py b/tests/conftest.py index 0b879b1..a08aa23 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,10 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- """ Dummy conftest.py for qrmine. If you don't know what this is for, just leave it empty. Read more about conftest.py under: - https://pytest.org/latest/plugins.html + - https://docs.pytest.org/en/stable/fixture.html + - https://docs.pytest.org/en/stable/writing_plugins.html """ -from __future__ import print_function, absolute_import, division # import pytest diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 1589d48..4ad331d 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -25,6 +25,7 @@ def test_generate_dict(corpus_fixture, capsys, q): all_interviews = Content(corpus_fixture.content) q.print_dict(all_interviews, num) captured = capsys.readouterr() + print(captured.out) assert 'code' in captured.out def test_generate_topics(corpus_fixture, capsys, q): @@ -32,18 +33,21 @@ def test_generate_topics(corpus_fixture, capsys, q): q.process_content() q.print_topics() captured = capsys.readouterr() + print(captured.out) assert 'TOPIC' in captured.out def test_category_basket(corpus_fixture, capsys, q): q.content = corpus_fixture print(q.category_basket()) captured = capsys.readouterr() + print(captured.out) assert 'theory' in captured.out def test_category_association(corpus_fixture, capsys, q): q.content = corpus_fixture print(q.category_association()) captured = capsys.readouterr() + print(captured.out) assert 'theory' in captured.out From 1f1394d373d7a285c4c3ba1a1cb7449b2bfcfb3d Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Thu, 7 Nov 2024 19:55:56 -0600 Subject: [PATCH 2/6] docs: update installation instructions in README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 35f6591..62e9d85 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,10 @@ QRMine is a suite of qualitative research (QR) data mining tools in Python using ## How to install +* Requires Python 3.11 and a CPU that support AVX instructions ```text - -pip install qrmine +pip install uv +uv pip install qrmine python -m spacy download en_core_web_sm ``` From a12a103d642571d0046048484aa29ea3bdd62db7 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Sun, 10 Nov 2024 13:24:35 -0600 Subject: [PATCH 3/6] chore: update development requirements and commands in configuration files --- dev-requirements.txt | 70 +++++++++++++++++++------------------------- notes/pip-tools.md | 3 +- tox.ini | 2 +- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index b7fa3e6..5c0eb4c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile dev-requirements.in -# +# This file was autogenerated by uv via the following command: +# uv pip compile dev-requirements.in -o dev-requirements.txt --universal alabaster==1.0.0 # via sphinx babel==2.16.0 @@ -12,27 +8,33 @@ build==1.2.2.post1 # via pip-tools cachetools==5.5.0 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # tox certifi==2024.8.30 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # requests chardet==5.2.0 # via tox charset-normalizer==3.4.0 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # requests click==8.1.7 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # pip-tools colorama==0.4.6 - # via tox + # via + # -c requirements.txt + # build + # click + # pytest + # sphinx + # tox commonmark==0.9.1 # via recommonmark -coverage[toml]==7.6.4 +coverage==7.6.4 # via pytest-cov distlib==0.3.9 # via virtualenv @@ -40,15 +42,13 @@ docutils==0.21.2 # via # recommonmark # sphinx -exceptiongroup==1.2.2 - # via pytest filelock==3.16.1 # via # tox # virtualenv idna==3.10 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # requests imagesize==1.4.1 # via sphinx @@ -56,21 +56,23 @@ iniconfig==2.0.0 # via pytest jinja2==3.1.4 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # sphinx markupsafe==3.0.2 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # jinja2 packaging==24.1 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # build # pyproject-api # pytest # setuptools-scm # sphinx # tox +pip==24.3.1 + # via pip-tools pip-tools==7.4.1 # via -r dev-requirements.in platformdirs==4.3.6 @@ -83,7 +85,7 @@ pluggy==1.5.0 # tox pygments==2.18.0 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # sphinx pyproject-api==1.8.0 # via tox @@ -101,8 +103,14 @@ recommonmark==0.7.1 # via -r dev-requirements.in requests==2.32.3 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # sphinx +setuptools==75.3.0 + # via + # -c requirements.txt + # -r dev-requirements.in + # pip-tools + # setuptools-scm setuptools-scm==8.1.0 # via -r dev-requirements.in snowballstemmer==2.2.0 @@ -123,34 +131,16 @@ sphinxcontrib-qthelp==2.0.0 # via sphinx sphinxcontrib-serializinghtml==2.0.0 # via sphinx -tomli==2.0.2 - # via - # build - # coverage - # pip-tools - # pyproject-api - # pytest - # setuptools-scm - # sphinx - # tox tox==4.23.2 # via -r dev-requirements.in -typing-extensions==4.12.2 - # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt - # tox urllib3==2.2.3 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # requests virtualenv==20.27.1 # via tox wheel==0.44.0 # via - # -c /home/beapen/repos/nlp-qrmine/requirements.txt + # -c requirements.txt # -r dev-requirements.in # pip-tools - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/notes/pip-tools.md b/notes/pip-tools.md index 2bbbecb..da4baa4 100644 --- a/notes/pip-tools.md +++ b/notes/pip-tools.md @@ -20,4 +20,5 @@ OR ## uv * pip install uv -* uv pip compile setup.cfg -o requirements.txt --universal \ No newline at end of file +* uv pip compile setup.cfg -o requirements.txt --universal +* uv pip compile dev-requirements.in -o dev-requirements.txt --universal \ No newline at end of file diff --git a/tox.ini b/tox.ini index 18d1eb4..3eb707d 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ [tox] minversion = 2.4 -envlist = py37, integration +envlist = py311, integration [testenv] setenv = TOXINIDIR = {toxinidir} From 10994dd652f340213f1cc20beec40d5ecde05aa8 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Sun, 10 Nov 2024 13:33:58 -0600 Subject: [PATCH 4/6] chore(deps): upgrade Python version and update package dependencies --- .github/workflows/docs.yml | 2 +- .github/workflows/publish.yml | 2 +- dev-requirements.txt | 4 +- requirements.txt | 101 +++++++++++++++++----------------- setup.cfg | 3 +- 5 files changed, 54 insertions(+), 58 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5a5f30a..7b25612 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7da783a..9018711 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5.1.1 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/dev-requirements.txt b/dev-requirements.txt index 5c0eb4c..f36f95c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -62,7 +62,7 @@ markupsafe==3.0.2 # via # -c requirements.txt # jinja2 -packaging==24.1 +packaging==24.2 # via # -c requirements.txt # build @@ -139,7 +139,7 @@ urllib3==2.2.3 # requests virtualenv==20.27.1 # via tox -wheel==0.44.0 +wheel==0.45.0 # via # -c requirements.txt # -r dev-requirements.in diff --git a/requirements.txt b/requirements.txt index 1fe7438..95e8c90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,16 +2,17 @@ # uv pip compile setup.cfg -o requirements.txt --universal absl-py==2.1.0 # via + # keras # tensorboard # tensorflow +annotated-types==0.7.0 + # via pydantic astunparse==1.6.3 # via tensorflow -blis==0.7.11 +blis==1.0.1 # via thinc cachetools==5.5.0 - # via - # google-auth - # textacy + # via textacy catalogue==2.0.10 # via # spacy @@ -54,14 +55,8 @@ floret==0.10.5 # via textacy fonttools==4.54.1 # via matplotlib -gast==0.4.0 +gast==0.6.0 # via tensorflow -google-auth==2.35.0 - # via - # google-auth-oauthlib - # tensorboard -google-auth-oauthlib==1.0.0 - # via tensorboard google-pasta==0.2.0 # via tensorflow grpcio==1.67.1 @@ -69,7 +64,9 @@ grpcio==1.67.1 # tensorboard # tensorflow h5py==3.12.1 - # via tensorflow + # via + # keras + # tensorflow idna==3.10 # via requests imbalanced-learn==0.12.4 @@ -84,7 +81,7 @@ joblib==1.4.2 # mlxtend # scikit-learn # textacy -keras==2.13.1 +keras==3.6.0 # via tensorflow kiwisolver==1.4.7 # via matplotlib @@ -110,23 +107,31 @@ matplotlib==3.9.2 # mlxtend mdurl==0.1.2 # via markdown-it-py -mlxtend==0.23.1 +ml-dtypes==0.4.1 + # via + # keras + # tensorflow +mlxtend==0.23.2 # via qrmine (setup.cfg) murmurhash==1.0.10 # via # preshed # spacy # thinc +namex==0.0.8 + # via keras networkx==3.4.2 # via textacy -numpy==1.24.3 +numpy==2.0.2 # via # blis # contourpy # floret # h5py # imbalanced-learn + # keras # matplotlib + # ml-dtypes # mlxtend # pandas # scikit-learn @@ -139,18 +144,20 @@ numpy==1.24.3 # xgboost nvidia-nccl-cu12==2.23.4 ; platform_machine != 'aarch64' and platform_system == 'Linux' # via xgboost -oauthlib==3.2.2 - # via requests-oauthlib opt-einsum==3.4.0 # via tensorflow -packaging==24.1 +optree==0.13.0 + # via keras +packaging==24.2 # via + # keras # matplotlib # spacy + # tensorboard # tensorflow # thinc # weasel -pandas==2.1.0 +pandas==2.2.3 # via # qrmine (setup.cfg) # mlxtend @@ -160,22 +167,18 @@ preshed==3.0.9 # via # spacy # thinc -protobuf==4.25.5 +protobuf==5.28.3 # via # tensorboard # tensorflow -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -pydantic==1.10.18 +pydantic==2.9.2 # via # confection # spacy # thinc # weasel +pydantic-core==2.23.4 + # via pydantic pygments==2.18.0 # via rich pyparsing==3.2.0 @@ -190,18 +193,15 @@ pytz==2024.2 # via pandas requests==2.32.3 # via - # requests-oauthlib # spacy - # tensorboard + # tensorflow # textacy # vadersentiment # weasel -requests-oauthlib==2.0.0 - # via google-auth-oauthlib rich==13.9.4 - # via typer -rsa==4.9 - # via google-auth + # via + # keras + # typer scikit-learn==1.5.2 # via # qrmine (setup.cfg) @@ -229,10 +229,11 @@ six==1.16.0 # astunparse # google-pasta # python-dateutil + # tensorboard # tensorflow smart-open==7.0.5 # via weasel -spacy==3.7.5 +spacy==3.8.2 # via # qrmine (setup.cfg) # textacy @@ -246,23 +247,19 @@ srsly==2.4.8 # spacy # thinc # weasel -tensorboard==2.13.0 +tensorboard==2.18.0 # via tensorflow tensorboard-data-server==0.7.2 # via tensorboard -tensorflow==2.13.1 +tensorflow==2.18.0 # via qrmine (setup.cfg) -tensorflow-estimator==2.13.0 +tensorflow-io-gcs-filesystem==0.37.1 ; python_full_version < '3.12' # via tensorflow -tensorflow-io-gcs-filesystem==0.31.0 - # via - # qrmine (setup.cfg) - # tensorflow termcolor==2.5.0 # via tensorflow textacy==0.13.0 # via qrmine (setup.cfg) -thinc==8.2.5 +thinc==8.3.2 # via spacy threadpoolctl==3.5.0 # via @@ -270,17 +267,19 @@ threadpoolctl==3.5.0 # scikit-learn toolz==1.0.0 # via cytoolz -tqdm==4.66.6 +tqdm==4.67.0 # via # spacy # textacy -typer==0.12.5 +typer==0.13.0 # via # spacy # weasel -typing-extensions==4.5.0 +typing-extensions==4.12.2 # via + # optree # pydantic + # pydantic-core # tensorflow # typer tzdata==2024.2 @@ -296,13 +295,11 @@ wasabi==1.1.3 # weasel weasel==0.4.1 # via spacy -werkzeug==3.1.1 +werkzeug==3.1.3 # via tensorboard -wheel==0.44.0 - # via - # astunparse - # tensorboard -wrapt==1.14.1 +wheel==0.45.0 + # via astunparse +wrapt==1.16.0 # via # smart-open # tensorflow diff --git a/setup.cfg b/setup.cfg index f3c1207..2930c4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,8 +64,7 @@ install_requires = mlxtend spacy textacy - tensorflow==2.13.1 - tensorflow-io-gcs-filesystem==0.31.0 + tensorflow [options.packages.find] where = src From 1dc2c4a7e2ed81ecc6360e07b1af98d2a78b2ba5 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Sun, 10 Nov 2024 13:39:37 -0600 Subject: [PATCH 5/6] chore(deps): update TensorFlow and related dependencies in requirements.txt and setup.cfg --- requirements.txt | 62 +++++++++++++++++++++++++++--------------------- setup.cfg | 2 +- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/requirements.txt b/requirements.txt index 95e8c90..5d96ef2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ # uv pip compile setup.cfg -o requirements.txt --universal absl-py==2.1.0 # via - # keras # tensorboard # tensorflow annotated-types==0.7.0 @@ -12,7 +11,9 @@ astunparse==1.6.3 blis==1.0.1 # via thinc cachetools==5.5.0 - # via textacy + # via + # google-auth + # textacy catalogue==2.0.10 # via # spacy @@ -57,6 +58,12 @@ fonttools==4.54.1 # via matplotlib gast==0.6.0 # via tensorflow +google-auth==2.36.0 + # via + # google-auth-oauthlib + # tensorboard +google-auth-oauthlib==1.0.0 + # via tensorboard google-pasta==0.2.0 # via tensorflow grpcio==1.67.1 @@ -64,9 +71,7 @@ grpcio==1.67.1 # tensorboard # tensorflow h5py==3.12.1 - # via - # keras - # tensorflow + # via tensorflow idna==3.10 # via requests imbalanced-learn==0.12.4 @@ -81,7 +86,7 @@ joblib==1.4.2 # mlxtend # scikit-learn # textacy -keras==3.6.0 +keras==2.14.0 # via tensorflow kiwisolver==1.4.7 # via matplotlib @@ -107,10 +112,8 @@ matplotlib==3.9.2 # mlxtend mdurl==0.1.2 # via markdown-it-py -ml-dtypes==0.4.1 - # via - # keras - # tensorflow +ml-dtypes==0.2.0 + # via tensorflow mlxtend==0.23.2 # via qrmine (setup.cfg) murmurhash==1.0.10 @@ -118,8 +121,6 @@ murmurhash==1.0.10 # preshed # spacy # thinc -namex==0.0.8 - # via keras networkx==3.4.2 # via textacy numpy==2.0.2 @@ -129,7 +130,6 @@ numpy==2.0.2 # floret # h5py # imbalanced-learn - # keras # matplotlib # ml-dtypes # mlxtend @@ -144,16 +144,14 @@ numpy==2.0.2 # xgboost nvidia-nccl-cu12==2.23.4 ; platform_machine != 'aarch64' and platform_system == 'Linux' # via xgboost +oauthlib==3.2.2 + # via requests-oauthlib opt-einsum==3.4.0 # via tensorflow -optree==0.13.0 - # via keras packaging==24.2 # via - # keras # matplotlib # spacy - # tensorboard # tensorflow # thinc # weasel @@ -167,10 +165,16 @@ preshed==3.0.9 # via # spacy # thinc -protobuf==5.28.3 +protobuf==4.25.5 # via # tensorboard # tensorflow +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth pydantic==2.9.2 # via # confection @@ -193,15 +197,18 @@ pytz==2024.2 # via pandas requests==2.32.3 # via + # requests-oauthlib # spacy - # tensorflow + # tensorboard # textacy # vadersentiment # weasel +requests-oauthlib==2.0.0 + # via google-auth-oauthlib rich==13.9.4 - # via - # keras - # typer + # via typer +rsa==4.9 + # via google-auth scikit-learn==1.5.2 # via # qrmine (setup.cfg) @@ -247,13 +254,15 @@ srsly==2.4.8 # spacy # thinc # weasel -tensorboard==2.18.0 +tensorboard==2.14.1 # via tensorflow tensorboard-data-server==0.7.2 # via tensorboard -tensorflow==2.18.0 +tensorflow==2.14.0 # via qrmine (setup.cfg) -tensorflow-io-gcs-filesystem==0.37.1 ; python_full_version < '3.12' +tensorflow-estimator==2.14.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.37.1 # via tensorflow termcolor==2.5.0 # via tensorflow @@ -277,7 +286,6 @@ typer==0.13.0 # weasel typing-extensions==4.12.2 # via - # optree # pydantic # pydantic-core # tensorflow @@ -299,7 +307,7 @@ werkzeug==3.1.3 # via tensorboard wheel==0.45.0 # via astunparse -wrapt==1.16.0 +wrapt==1.14.1 # via # smart-open # tensorflow diff --git a/setup.cfg b/setup.cfg index 2930c4b..cf236a2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,7 +64,7 @@ install_requires = mlxtend spacy textacy - tensorflow + tensorflow<=2.16.2 [options.packages.find] where = src From b91237db3f49df3507e59e74c57ec6f4bf8b4a4b Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Sun, 10 Nov 2024 13:44:40 -0600 Subject: [PATCH 6/6] chore(deps): downgrade TensorFlow and related dependencies in requirements.txt and setup.cfg --- requirements.txt | 49 ++++++++++++++++++++++++------------------------ setup.cfg | 3 ++- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5d96ef2..de9e6f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,9 @@ absl-py==2.1.0 # via # tensorboard # tensorflow -annotated-types==0.7.0 - # via pydantic astunparse==1.6.3 # via tensorflow -blis==1.0.1 +blis==0.7.11 # via thinc cachetools==5.5.0 # via @@ -56,7 +54,7 @@ floret==0.10.5 # via textacy fonttools==4.54.1 # via matplotlib -gast==0.6.0 +gast==0.4.0 # via tensorflow google-auth==2.36.0 # via @@ -86,7 +84,7 @@ joblib==1.4.2 # mlxtend # scikit-learn # textacy -keras==2.14.0 +keras==2.13.1 # via tensorflow kiwisolver==1.4.7 # via matplotlib @@ -112,8 +110,6 @@ matplotlib==3.9.2 # mlxtend mdurl==0.1.2 # via markdown-it-py -ml-dtypes==0.2.0 - # via tensorflow mlxtend==0.23.2 # via qrmine (setup.cfg) murmurhash==1.0.10 @@ -123,7 +119,7 @@ murmurhash==1.0.10 # thinc networkx==3.4.2 # via textacy -numpy==2.0.2 +numpy==1.24.3 # via # blis # contourpy @@ -131,7 +127,6 @@ numpy==2.0.2 # h5py # imbalanced-learn # matplotlib - # ml-dtypes # mlxtend # pandas # scikit-learn @@ -155,7 +150,11 @@ packaging==24.2 # tensorflow # thinc # weasel -pandas==2.2.3 +pandas==2.1.0 ; python_full_version >= '3.12' + # via + # qrmine (setup.cfg) + # mlxtend +pandas==2.2.3 ; python_full_version < '3.12' # via # qrmine (setup.cfg) # mlxtend @@ -175,14 +174,12 @@ pyasn1==0.6.1 # rsa pyasn1-modules==0.4.1 # via google-auth -pydantic==2.9.2 +pydantic==1.10.19 # via # confection # spacy # thinc # weasel -pydantic-core==2.23.4 - # via pydantic pygments==2.18.0 # via rich pyparsing==3.2.0 @@ -236,11 +233,10 @@ six==1.16.0 # astunparse # google-pasta # python-dateutil - # tensorboard # tensorflow smart-open==7.0.5 # via weasel -spacy==3.8.2 +spacy==3.7.5 # via # qrmine (setup.cfg) # textacy @@ -254,21 +250,23 @@ srsly==2.4.8 # spacy # thinc # weasel -tensorboard==2.14.1 +tensorboard==2.13.0 # via tensorflow tensorboard-data-server==0.7.2 # via tensorboard -tensorflow==2.14.0 +tensorflow==2.13.1 # via qrmine (setup.cfg) -tensorflow-estimator==2.14.0 - # via tensorflow -tensorflow-io-gcs-filesystem==0.37.1 +tensorflow-estimator==2.13.0 # via tensorflow +tensorflow-io-gcs-filesystem==0.31.0 + # via + # qrmine (setup.cfg) + # tensorflow termcolor==2.5.0 # via tensorflow textacy==0.13.0 # via qrmine (setup.cfg) -thinc==8.3.2 +thinc==8.2.5 # via spacy threadpoolctl==3.5.0 # via @@ -284,10 +282,9 @@ typer==0.13.0 # via # spacy # weasel -typing-extensions==4.12.2 +typing-extensions==4.5.0 # via # pydantic - # pydantic-core # tensorflow # typer tzdata==2024.2 @@ -306,8 +303,10 @@ weasel==0.4.1 werkzeug==3.1.3 # via tensorboard wheel==0.45.0 - # via astunparse -wrapt==1.14.1 + # via + # astunparse + # tensorboard +wrapt==1.16.0 # via # smart-open # tensorflow diff --git a/setup.cfg b/setup.cfg index cf236a2..e6953b9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,7 +64,8 @@ install_requires = mlxtend spacy textacy - tensorflow<=2.16.2 + tensorflow<=2.13.1 + tensorflow-io-gcs-filesystem<=0.31.0 [options.packages.find] where = src