diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index f742724..3693dc2 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -13,27 +13,27 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.11"]
         os: [ubuntu-latest, macos-13, windows-latest]
     runs-on: ${{ matrix.os }}
     timeout-minutes: 20
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+    - name: Install uv
+      uses: astral-sh/setup-uv@v5
       with:
-        python-version: ${{ matrix.python-version }}
-        cache: 'pip' # caching pip dependencies
+        enable-cache: true
+    - name: "Set up Python"
+      uses: actions/setup-python@v5
+      with:
+          python-version-file: "pyproject.toml"
     - name: run on mac
       if: startsWith(matrix.os, 'mac')
       run: |
         brew install libomp
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        python -m spacy download en_core_web_sm
-    - name: Test with pytest
+    - name: Install the project
       run: |
-        pip install pytest
-        pytest
+        uv sync --all-extras --dev
+        uv pip install pip
+        uv run python -m spacy download en_core_web_sm
+    - name: Run tests
+      run: uv run pytest tests
diff --git a/.gitignore b/.gitignore
index 64049e7..c29a2a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ __pycache__/*
 .idea
 .venv
 conda
+uv.lock
 
 # Package files
 *.egg
diff --git a/dev-requirements.in b/dev-requirements.in
deleted file mode 100644
index 2b56355..0000000
--- a/dev-requirements.in
+++ /dev/null
@@ -1,11 +0,0 @@
-# dev-requirements.in
--c requirements.txt
-pytest-cov
-pytest
-recommonmark
-sphinx>=3.2.1
-setuptools
-setuptools_scm
-wheel>=0.37.0 # conflicts with dependency of tensorflow
-tox
-pip-tools
\ No newline at end of file
diff --git a/dev-requirements.txt b/dev-requirements.txt
deleted file mode 100644
index f36f95c..0000000
--- a/dev-requirements.txt
+++ /dev/null
@@ -1,146 +0,0 @@
-# This file was autogenerated by uv via the following command:
-#    uv pip compile dev-requirements.in -o dev-requirements.txt --universal
-alabaster==1.0.0
-    # via sphinx
-babel==2.16.0
-    # via sphinx
-build==1.2.2.post1
-    # via pip-tools
-cachetools==5.5.0
-    # via
-    #   -c requirements.txt
-    #   tox
-certifi==2024.8.30
-    # via
-    #   -c requirements.txt
-    #   requests
-chardet==5.2.0
-    # via tox
-charset-normalizer==3.4.0
-    # via
-    #   -c requirements.txt
-    #   requests
-click==8.1.7
-    # via
-    #   -c requirements.txt
-    #   pip-tools
-colorama==0.4.6
-    # via
-    #   -c requirements.txt
-    #   build
-    #   click
-    #   pytest
-    #   sphinx
-    #   tox
-commonmark==0.9.1
-    # via recommonmark
-coverage==7.6.4
-    # via pytest-cov
-distlib==0.3.9
-    # via virtualenv
-docutils==0.21.2
-    # via
-    #   recommonmark
-    #   sphinx
-filelock==3.16.1
-    # via
-    #   tox
-    #   virtualenv
-idna==3.10
-    # via
-    #   -c requirements.txt
-    #   requests
-imagesize==1.4.1
-    # via sphinx
-iniconfig==2.0.0
-    # via pytest
-jinja2==3.1.4
-    # via
-    #   -c requirements.txt
-    #   sphinx
-markupsafe==3.0.2
-    # via
-    #   -c requirements.txt
-    #   jinja2
-packaging==24.2
-    # via
-    #   -c requirements.txt
-    #   build
-    #   pyproject-api
-    #   pytest
-    #   setuptools-scm
-    #   sphinx
-    #   tox
-pip==24.3.1
-    # via pip-tools
-pip-tools==7.4.1
-    # via -r dev-requirements.in
-platformdirs==4.3.6
-    # via
-    #   tox
-    #   virtualenv
-pluggy==1.5.0
-    # via
-    #   pytest
-    #   tox
-pygments==2.18.0
-    # via
-    #   -c requirements.txt
-    #   sphinx
-pyproject-api==1.8.0
-    # via tox
-pyproject-hooks==1.2.0
-    # via
-    #   build
-    #   pip-tools
-pytest==8.3.3
-    # via
-    #   -r dev-requirements.in
-    #   pytest-cov
-pytest-cov==6.0.0
-    # via -r dev-requirements.in
-recommonmark==0.7.1
-    # via -r dev-requirements.in
-requests==2.32.3
-    # via
-    #   -c requirements.txt
-    #   sphinx
-setuptools==75.3.0
-    # via
-    #   -c requirements.txt
-    #   -r dev-requirements.in
-    #   pip-tools
-    #   setuptools-scm
-setuptools-scm==8.1.0
-    # via -r dev-requirements.in
-snowballstemmer==2.2.0
-    # via sphinx
-sphinx==8.1.3
-    # via
-    #   -r dev-requirements.in
-    #   recommonmark
-sphinxcontrib-applehelp==2.0.0
-    # via sphinx
-sphinxcontrib-devhelp==2.0.0
-    # via sphinx
-sphinxcontrib-htmlhelp==2.1.0
-    # via sphinx
-sphinxcontrib-jsmath==1.0.1
-    # via sphinx
-sphinxcontrib-qthelp==2.0.0
-    # via sphinx
-sphinxcontrib-serializinghtml==2.0.0
-    # via sphinx
-tox==4.23.2
-    # via -r dev-requirements.in
-urllib3==2.2.3
-    # via
-    #   -c requirements.txt
-    #   requests
-virtualenv==20.27.1
-    # via tox
-wheel==0.45.0
-    # via
-    #   -c requirements.txt
-    #   -r dev-requirements.in
-    #   pip-tools
diff --git a/notes/conda.md b/notes/conda.md
new file mode 100644
index 0000000..79eb6c8
--- /dev/null
+++ b/notes/conda.md
@@ -0,0 +1,12 @@
+conda create --name qrmine python=3.11
+conda activate qrmine
+
+conda install conda-forge::uv
+uv pip install ini2toml
+ini2toml setup.cfg -o pyproject.toml
+uv pip install -e .
+python -m spacy download en_core_web_sm
+
+
+
+pip3 install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
\ No newline at end of file
diff --git a/notes/new-process.md b/notes/new-process.md
new file mode 100644
index 0000000..1ead749
--- /dev/null
+++ b/notes/new-process.md
@@ -0,0 +1,34 @@
+conda install conda-forge::uv
+uv pip install ini2toml
+ini2toml setup.cfg -o pyproject.toml
+
+delete setup.cpg
+delete requirements.txt, dev-requirements.txt, dev-requirements.in
+remove deps from tox.ini
+
+uv pip install -e .
+see pr.yml for GitHub actions
+see pyproject.toml for pytorch cpu install
+uv pip install -e .
+
+uv sync --all-extras --dev
+uv pip install pip
+uv run python -m spacy download en_core_web_sm
+
+pyproject.toml
+requires = ["setuptools>=61.2", "wheel", "pip"]
+
+dev = [
+    "setuptools",
+    "setuptools_scm",
+    "pytest",
+    "pytest-cov",
+    "tox",
+    "black",
+    "recommonmark",
+    "sphinx",
+    "wheel",
+    "twine",
+    "tox",
+]
+
diff --git a/notes/pip-tools.md b/notes/pip-tools.md
index da4baa4..c504a1e 100644
--- a/notes/pip-tools.md
+++ b/notes/pip-tools.md
@@ -21,4 +21,7 @@ OR
 
 * pip install uv
 * uv pip compile setup.cfg -o requirements.txt --universal
-* uv pip compile dev-requirements.in -o dev-requirements.txt --universal
\ No newline at end of file
+* uv pip compile dev-requirements.in -o dev-requirements.txt --universal
+
+uv pip install ini2toml
+ini2toml setup.cfg -o pyproject.toml
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 89a5bed..9fc3688 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,187 @@
 [build-system]
-# AVOID CHANGING REQUIRES: IT WILL BE UPDATED BY PYSCAFFOLD!
-requires = ["setuptools>=46.1.0", "setuptools_scm[toml]>=5"]
+requires = ["setuptools>=61.2", "wheel", "pip"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools_scm]
-# For smarter version schemes and other configuration options,
-# check out https://github.com/pypa/setuptools_scm
-version_scheme = "no-guess-dev"
+[project]
+name = "qrmine"
+description = "Qualitative Research support tools in Python!"
+authors = [{name = "beapen", email = "github@gulfdoctor.net"}]
+license = {text = "GPL-3.0-only"}
+# license_files = LICENSE.txt
+# long_description = file: README.rst
+# long_description_content_type = text/x-rst; charset=UTF-8
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Development Status :: 4 - Beta",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+requires-python = ">=3.11, <3.12"
+dependencies = [
+    'importlib-metadata; python_version<"3.8"',
+    "pandas",
+    "matplotlib",
+    "click",
+    "scikit-learn",
+    "imbalanced-learn",
+    "vaderSentiment",
+    "xgboost",
+    "mlxtend",
+    "spacy",
+    "textacy",
+    "torch==2.2.2",
+    "pypdf",
+    "requests",
+    "gensim",
+    "seaborn",
+    "wordcloud",
+]
+dynamic = ["version"]
+
+[project.readme]
+file = "README.md"
+content-type = "text/markdown"
+# Add here related links, for example:
+
+[project.urls]
+Homepage = "https://github.com/dermatologist/nlp-qrmine"
+Documentation = "https://arxiv.org/abs/2003.13519"
+# Source = https://github.com/pyscaffold/pyscaffold/
+# Changelog = https://pyscaffold.org/en/latest/changelog.html
+# Tracker = https://github.com/pyscaffold/pyscaffold/issues
+# Conda-Forge = https://anaconda.org/conda-forge/pyscaffold
+# Download = https://pypi.org/project/PyScaffold/#files
+# Twitter = https://twitter.com/PyScaffold
+# Change if running only on Windows, Mac or Linux (comma-separated)
+# Add here all kinds of additional classifiers as defined under
+# https://pypi.org/classifiers/
+
+[project.optional-dependencies]
+# Add here additional requirements for extra features, to install with:
+# `pip install qrmine[PDF]` like:
+# PDF = ReportLab; RXP
+# Add here test requirements (semicolon/line-separated)
+testing = [
+    "setuptools",
+    "pytest",
+    "pytest-cov",
+]
+
+dev = [
+    "setuptools",
+    "setuptools_scm",
+    "pytest",
+    "pytest-cov",
+    "tox",
+    "black",
+    "recommonmark",
+    "sphinx",
+    "wheel",
+    "twine",
+    "tox",
+]
+
+[project.entry-points]
+# Add here console scripts like:
+# console_scripts =
+# script_name = qrmine.module:function
+# For example:
+# console_scripts =
+# fibonacci = qrmine.skeleton:run
+# And any other entry points, for example:
+# pyscaffold.cli =
+# awesome = pyscaffoldext.awesome.extension:AwesomeExtension
+
+[project.scripts]
+qrmine = "qrmine.main:main_routine"
+
+[tool.setuptools]
+zip-safe = false
+include-package-data = true
+package-dir = {"" = "src"}
+# Require a min/specific Python version (comma-separated conditions)
+# python_requires = >=3.8
+# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
+# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
+# new major versions. This works if the required packages follow Semantic Versioning.
+# For more information, check out https://semver.org/.
+platforms = ["any"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+exclude = ["tests"]
+namespaces = true
+
+[tool.pytest.ini_options]
+# Specify command line options as you would do when invoking pytest directly.
+# e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml
+# in order to write a coverage file that can be read by Jenkins.
+# CAUTION: --cov flags may prohibit setting breakpoints while debugging.
+# Comment those flags to avoid this pytest issue.
+addopts = """
+--verbose"""
+norecursedirs = [
+    "dist",
+    "build",
+    ".tox",
+]
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[tool.uv.sources]
+torch = [
+  { index = "pytorch-cpu" },
+]
+torchvision = [
+  { index = "pytorch-cpu" },
+]
+
+[tool.aliases]
+release = "sdist bdist_wheel upload"
+
+[tool.distutils.bdist_wheel]
+# Use this option if your package is pure-python
+universal = 1
+
+[tool.build_sphinx]
+source_dir = "docs"
+build_dir = "docs/_build"
+testpaths = "tests"
+# Use pytest markers to select/deselect specific tests
+# markers =
+# slow: mark tests as slow (deselect with '-m "not slow"')
+# system: mark end-to-end system tests
+
+[tool.devpi.upload]
+# Options for the devpi: PyPI server and packaging tool
+# VCS export must be deactivated since we are using setuptools-scm
+no_vcs = "1"
+formats = "bdist_wheel"
+
+[tool.flake8]
+# Some sane defaults for the code style checker flake8
+max_line_length = "88"
+extend_ignore = "E203, W503"
+# ^  Black-compatible
+# E203 and W503 have edge cases handled by black
+exclude = """
+.tox
+build
+dist
+.eggs
+docs/conf.py"""
+
+[tool.pyscaffold]
+# PyScaffold's parameters when the project was created.
+# This will be used when updating. Do not change!
+version = "4.6"
+package = "qrmine"
+# This file is used to configure your project.
+# Read more about the various options under:
+# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
+# https://setuptools.pypa.io/en/latest/references/keywords.html
+
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 260d413..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,314 +0,0 @@
-# This file was autogenerated by uv via the following command:
-#    uv pip compile setup.cfg -o requirements.txt --universal
-absl-py==2.1.0
-    # via
-    #   tensorboard
-    #   tensorflow
-astunparse==1.6.3
-    # via tensorflow
-blis==0.7.11
-    # via thinc
-cachetools==5.5.0
-    # via
-    #   google-auth
-    #   textacy
-catalogue==2.0.10
-    # via
-    #   spacy
-    #   srsly
-    #   textacy
-    #   thinc
-certifi==2024.8.30
-    # via requests
-charset-normalizer==3.4.0
-    # via requests
-click==8.1.7
-    # via
-    #   qrmine (setup.cfg)
-    #   typer
-cloudpathlib==0.20.0
-    # via weasel
-colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
-    # via
-    #   click
-    #   tqdm
-    #   wasabi
-confection==0.1.5
-    # via
-    #   thinc
-    #   weasel
-contourpy==1.3.0
-    # via matplotlib
-cycler==0.12.1
-    # via matplotlib
-cymem==2.0.8
-    # via
-    #   preshed
-    #   spacy
-    #   thinc
-cytoolz==1.0.0
-    # via textacy
-flatbuffers==24.3.25
-    # via tensorflow
-floret==0.10.5
-    # via textacy
-fonttools==4.54.1
-    # via matplotlib
-gast==0.4.0
-    # via tensorflow
-google-auth==2.36.0
-    # via
-    #   google-auth-oauthlib
-    #   tensorboard
-google-auth-oauthlib==1.0.0
-    # via tensorboard
-google-pasta==0.2.0
-    # via tensorflow
-grpcio==1.67.1
-    # via
-    #   tensorboard
-    #   tensorflow
-h5py==3.12.1
-    # via tensorflow
-idna==3.10
-    # via requests
-imbalanced-learn==0.12.4
-    # via qrmine (setup.cfg)
-jellyfish==1.1.0
-    # via textacy
-jinja2==3.1.6
-    # via spacy
-joblib==1.4.2
-    # via
-    #   imbalanced-learn
-    #   mlxtend
-    #   scikit-learn
-    #   textacy
-keras==2.13.1
-    # via tensorflow
-kiwisolver==1.4.7
-    # via matplotlib
-langcodes==3.4.1
-    # via spacy
-language-data==1.2.0
-    # via langcodes
-libclang==18.1.1
-    # via tensorflow
-marisa-trie==1.2.1
-    # via language-data
-markdown==3.7
-    # via tensorboard
-markdown-it-py==3.0.0
-    # via rich
-markupsafe==3.0.2
-    # via
-    #   jinja2
-    #   werkzeug
-matplotlib==3.9.2
-    # via
-    #   qrmine (setup.cfg)
-    #   mlxtend
-mdurl==0.1.2
-    # via markdown-it-py
-mlxtend==0.23.2
-    # via qrmine (setup.cfg)
-murmurhash==1.0.10
-    # via
-    #   preshed
-    #   spacy
-    #   thinc
-networkx==3.4.2
-    # via textacy
-numpy==1.24.3
-    # via
-    #   blis
-    #   contourpy
-    #   floret
-    #   h5py
-    #   imbalanced-learn
-    #   matplotlib
-    #   mlxtend
-    #   pandas
-    #   scikit-learn
-    #   scipy
-    #   spacy
-    #   tensorboard
-    #   tensorflow
-    #   textacy
-    #   thinc
-    #   xgboost
-nvidia-nccl-cu12==2.23.4 ; platform_machine != 'aarch64' and platform_system == 'Linux'
-    # via xgboost
-oauthlib==3.2.2
-    # via requests-oauthlib
-opt-einsum==3.4.0
-    # via tensorflow
-packaging==24.2
-    # via
-    #   matplotlib
-    #   spacy
-    #   tensorflow
-    #   thinc
-    #   weasel
-pandas==2.1.0 ; python_full_version >= '3.12'
-    # via
-    #   qrmine (setup.cfg)
-    #   mlxtend
-pandas==2.2.3 ; python_full_version < '3.12'
-    # via
-    #   qrmine (setup.cfg)
-    #   mlxtend
-pillow==11.0.0
-    # via matplotlib
-preshed==3.0.9
-    # via
-    #   spacy
-    #   thinc
-protobuf==4.25.5
-    # via
-    #   tensorboard
-    #   tensorflow
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.1
-    # via google-auth
-pydantic==1.10.19
-    # via
-    #   confection
-    #   spacy
-    #   thinc
-    #   weasel
-pygments==2.18.0
-    # via rich
-pyparsing==3.2.0
-    # via matplotlib
-pyphen==0.17.0
-    # via textacy
-python-dateutil==2.9.0.post0
-    # via
-    #   matplotlib
-    #   pandas
-pytz==2024.2
-    # via pandas
-requests==2.32.3
-    # via
-    #   requests-oauthlib
-    #   spacy
-    #   tensorboard
-    #   textacy
-    #   vadersentiment
-    #   weasel
-requests-oauthlib==2.0.0
-    # via google-auth-oauthlib
-rich==13.9.4
-    # via typer
-rsa==4.9
-    # via google-auth
-scikit-learn==1.5.2
-    # via
-    #   qrmine (setup.cfg)
-    #   imbalanced-learn
-    #   mlxtend
-    #   textacy
-scipy==1.14.1
-    # via
-    #   imbalanced-learn
-    #   mlxtend
-    #   scikit-learn
-    #   textacy
-    #   xgboost
-setuptools==75.3.0
-    # via
-    #   marisa-trie
-    #   spacy
-    #   tensorboard
-    #   tensorflow
-    #   thinc
-shellingham==1.5.4
-    # via typer
-six==1.16.0
-    # via
-    #   astunparse
-    #   google-pasta
-    #   python-dateutil
-    #   tensorflow
-smart-open==7.0.5
-    # via weasel
-spacy==3.7.5
-    # via
-    #   qrmine (setup.cfg)
-    #   textacy
-spacy-legacy==3.0.12
-    # via spacy
-spacy-loggers==1.0.5
-    # via spacy
-srsly==2.4.8
-    # via
-    #   confection
-    #   spacy
-    #   thinc
-    #   weasel
-tensorboard==2.13.0
-    # via tensorflow
-tensorboard-data-server==0.7.2
-    # via tensorboard
-tensorflow==2.13.1
-    # via qrmine (setup.cfg)
-tensorflow-estimator==2.13.0
-    # via tensorflow
-tensorflow-io-gcs-filesystem==0.31.0
-    # via
-    #   qrmine (setup.cfg)
-    #   tensorflow
-termcolor==2.5.0
-    # via tensorflow
-textacy==0.13.0
-    # via qrmine (setup.cfg)
-thinc==8.2.5
-    # via spacy
-threadpoolctl==3.5.0
-    # via
-    #   imbalanced-learn
-    #   scikit-learn
-toolz==1.0.0
-    # via cytoolz
-tqdm==4.67.0
-    # via
-    #   spacy
-    #   textacy
-typer==0.13.0
-    # via
-    #   spacy
-    #   weasel
-typing-extensions==4.5.0
-    # via
-    #   pydantic
-    #   tensorflow
-    #   typer
-tzdata==2024.2
-    # via pandas
-urllib3==2.2.3
-    # via requests
-vadersentiment==3.3.2
-    # via qrmine (setup.cfg)
-wasabi==1.1.3
-    # via
-    #   spacy
-    #   thinc
-    #   weasel
-weasel==0.4.1
-    # via spacy
-werkzeug==3.1.3
-    # via tensorboard
-wheel==0.45.0
-    # via
-    #   astunparse
-    #   tensorboard
-wrapt==1.16.0
-    # via
-    #   smart-open
-    #   tensorflow
-xgboost==2.1.2
-    # via qrmine (setup.cfg)
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index e6953b9..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,152 +0,0 @@
-# This file is used to configure your project.
-# Read more about the various options under:
-# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
-# https://setuptools.pypa.io/en/latest/references/keywords.html
-
-[metadata]
-name = qrmine
-description = Qualitative Research support tools in Python!
-author = beapen
-author_email = github@gulfdoctor.net
-license = GPL-3.0-only
-# license_files = LICENSE.txt
-# long_description = file: README.rst
-# long_description_content_type = text/x-rst; charset=UTF-8
-long_description = file: README.md
-long_description_content_type = text/markdown
-url = https://github.com/dermatologist/nlp-qrmine
-# Add here related links, for example:
-project_urls =
-    Documentation = https://arxiv.org/abs/2003.13519
-#    Source = https://github.com/pyscaffold/pyscaffold/
-#    Changelog = https://pyscaffold.org/en/latest/changelog.html
-#    Tracker = https://github.com/pyscaffold/pyscaffold/issues
-#    Conda-Forge = https://anaconda.org/conda-forge/pyscaffold
-#    Download = https://pypi.org/project/PyScaffold/#files
-#    Twitter = https://twitter.com/PyScaffold
-
-# Change if running only on Windows, Mac or Linux (comma-separated)
-platforms = any
-
-# Add here all kinds of additional classifiers as defined under
-# https://pypi.org/classifiers/
-classifiers =
-    Intended Audience :: Science/Research
-    Development Status :: 4 - Beta
-    Operating System :: OS Independent
-    Programming Language :: Python :: 3.11
-    Topic :: Scientific/Engineering :: Information Analysis
-
-
-[options]
-zip_safe = False
-packages = find_namespace:
-include_package_data = True
-package_dir =
-    =src
-
-# Require a min/specific Python version (comma-separated conditions)
-# python_requires = >=3.8
-
-# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
-# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
-# new major versions. This works if the required packages follow Semantic Versioning.
-# For more information, check out https://semver.org/.
-install_requires =
-    importlib-metadata; python_version<"3.8"
-    pandas
-    matplotlib
-    click
-    scikit-learn
-    imbalanced-learn
-    vaderSentiment
-    xgboost
-    mlxtend
-    spacy
-    textacy
-    tensorflow<=2.13.1
-    tensorflow-io-gcs-filesystem<=0.31.0
-
-[options.packages.find]
-where = src
-exclude =
-    tests
-
-[options.extras_require]
-# Add here additional requirements for extra features, to install with:
-# `pip install qrmine[PDF]` like:
-# PDF = ReportLab; RXP
-
-# Add here test requirements (semicolon/line-separated)
-testing =
-    setuptools
-    pytest
-    pytest-cov
-
-[options.entry_points]
-# Add here console scripts like:
-# console_scripts =
-#     script_name = qrmine.module:function
-# For example:
-# console_scripts =
-#     fibonacci = qrmine.skeleton:run
-# And any other entry points, for example:
-# pyscaffold.cli =
-#     awesome = pyscaffoldext.awesome.extension:AwesomeExtension
-console_scripts =
-    qrmine = qrmine.main:main_routine
-
-[tool:pytest]
-# Specify command line options as you would do when invoking pytest directly.
-# e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml
-# in order to write a coverage file that can be read by Jenkins.
-# CAUTION: --cov flags may prohibit setting breakpoints while debugging.
-#          Comment those flags to avoid this pytest issue.
-addopts =
-    --verbose
-norecursedirs =
-    dist
-    build
-    .tox
-
-[aliases]
-release = sdist bdist_wheel upload
-
-[bdist_wheel]
-# Use this option if your package is pure-python
-universal = 1
-
-[build_sphinx]
-source_dir = docs
-build_dir = docs/_build
-
-testpaths = tests
-# Use pytest markers to select/deselect specific tests
-# markers =
-#     slow: mark tests as slow (deselect with '-m "not slow"')
-#     system: mark end-to-end system tests
-
-[devpi:upload]
-# Options for the devpi: PyPI server and packaging tool
-# VCS export must be deactivated since we are using setuptools-scm
-no_vcs = 1
-formats = bdist_wheel
-
-[flake8]
-# Some sane defaults for the code style checker flake8
-max_line_length = 88
-extend_ignore = E203, W503
-# ^  Black-compatible
-#    E203 and W503 have edge cases handled by black
-exclude =
-    .tox
-    build
-    dist
-    .eggs
-    docs/conf.py
-
-[pyscaffold]
-# PyScaffold's parameters when the project was created.
-# This will be used when updating. Do not change!
-version = 4.6
-package = qrmine
diff --git a/src/qrmine/__init__.py b/src/qrmine/__init__.py
index 09a4e35..3549721 100644
--- a/src/qrmine/__init__.py
+++ b/src/qrmine/__init__.py
@@ -6,6 +6,8 @@
 from .readfiles import ReadData
 from .sentiment import Sentiment
 from .mlqrmine import MLQRMine
+from .cluster import ClusterDocs
+from .visualize import QRVisualize
 
 if sys.version_info[:2] >= (3, 8):
     # TODO: Import directly (no need for conditional) when `python_requires = >= 3.8`
diff --git a/src/qrmine/cluster.py b/src/qrmine/cluster.py
new file mode 100644
index 0000000..3e68ac3
--- /dev/null
+++ b/src/qrmine/cluster.py
@@ -0,0 +1,189 @@
+"""
+Copyright (C) 2025 Bell Eapen
+
+This file is part of qrmine.
+
+qrmine is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+qrmine is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with qrmine.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+from pprint import pprint
+
+import pandas as pd
+import spacy
+from gensim import corpora
+from gensim.models.ldamodel import LdaModel
+
+
+class ClusterDocs:
+
+    def __init__(self, documents=[], titles=[]):
+        self._nlp = spacy.load("en_core_web_sm")
+        self._documents = documents
+        self._titles = titles
+        self._num_topics = 5
+        self._passes = 15
+        self._dictionary = None
+        self._corpus = None
+        self._lda_model = None
+        # Apply preprocessing to each document
+        self._processed_docs = [self.preprocess(doc) for doc in documents]
+        self.process()
+
+    @property
+    def documents(self):
+        return self._documents
+
+    @property
+    def titles(self):
+        return self._titles
+
+    @property
+    def num_topics(self):
+        return self._num_topics
+
+    @property
+    def passes(self):
+        return self._passes
+
+    @property
+    def processed_docs(self):
+        return self._processed_docs
+
+    @documents.setter
+    def documents(self, documents):
+        self._documents = documents
+        self._processed_docs = [self.preprocess(doc) for doc in documents]
+        self.process()
+
+    @titles.setter
+    def titles(self, titles):
+        self._titles = titles
+
+    @num_topics.setter
+    def num_topics(self, num_topics):
+        self._num_topics = num_topics
+
+    @passes.setter
+    def passes(self, passes):
+        self._passes = passes
+
+    # Preprocess the documents using spaCy
+    def preprocess(self, doc):
+        # Tokenize and preprocess each document
+        doc = self._nlp(doc)
+        # Lemmatize and remove stop words
+        tokens = [token.lemma_ for token in doc if not token.is_stop]
+        return tokens
+
+    def process(self):
+        # Create a dictionary representation of the documents
+        self._dictionary = corpora.Dictionary(self._processed_docs)
+        # Create a bag-of-words representation of the documents
+        self._corpus = [self._dictionary.doc2bow(doc) for doc in self._processed_docs]
+        # Build the LDA (Latent Dirichlet Allocation) model
+
+    def build_lda_model(self):
+        if self._lda_model is None:
+            self._lda_model = LdaModel(
+                self._corpus,
+                num_topics=self._num_topics,
+                id2word=self._dictionary,
+                passes=self._passes,
+            )
+        return self._lda_model.show_topics(formatted=False)
+
+    def print_topics(self, num_words=5):
+        if self._lda_model is None:
+            self.build_lda_model()
+        # Print the topics and their corresponding words
+        pprint(self._lda_model.print_topics(num_words=num_words))
+
+    def print_clusters(self):
+        if self._lda_model is None:
+            self.build_lda_model()
+        # Perform semantic clustering
+        for i, doc in enumerate(
+            self._processed_docs
+        ):  # Changed from get_processed_docs() to _documents
+            bow = self._dictionary.doc2bow(doc)
+            print(
+                f"Document {self._titles[i]} belongs to topic: {self._lda_model.get_document_topics(bow)}"
+            )
+
+    def format_topics_sentences(self):
+        self.build_lda_model()
+        # Init output
+        sent_topics_df = pd.DataFrame()
+
+        # Get main topic in each document
+        for i, row_list in enumerate(self._lda_model[self._corpus]):
+            row = row_list[0] if self._lda_model.per_word_topics else row_list
+            # print(row)
+            row = sorted(row, key=lambda x: (x[1]), reverse=True)
+            # Get the Dominant topic, Perc Contribution and Keywords for each document
+            for j, (topic_num, prop_topic) in enumerate(row):
+                if j == 0:  # => dominant topic
+                    wp = self._lda_model.show_topic(topic_num)
+                    topic_keywords = ", ".join([word for word, prop in wp])
+                    new_row = pd.DataFrame(
+                        [[int(topic_num), round(prop_topic, 4), topic_keywords]],
+                        columns=[
+                            "Dominant_Topic",
+                            "Perc_Contribution",
+                            "Topic_Keywords",
+                        ],
+                    )
+                    sent_topics_df = pd.concat(
+                        [sent_topics_df, new_row], ignore_index=True
+                    )
+                else:
+                    break
+        sent_topics_df.columns = [
+            "Dominant_Topic",
+            "Perc_Contribution",
+            "Topic_Keywords",
+        ]
+
+        # Add original text to the end of the output
+        contents = pd.Series(self._processed_docs)
+        sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
+        return sent_topics_df.reset_index(drop=False)
+
+    # https://www.machinelearningplus.com/nlp/topic-modeling-visualization-how-to-present-results-lda-models/
+    def most_representative_docs(self):
+        sent_topics_df = self.format_topics_sentences()
+        sent_topics_sorteddf_mallet = pd.DataFrame()
+        sent_topics_outdf_grpd = sent_topics_df.groupby("Dominant_Topic")
+
+        for i, grp in sent_topics_outdf_grpd:
+            sent_topics_sorteddf_mallet = pd.concat(
+                [
+                    sent_topics_sorteddf_mallet,
+                    grp.sort_values(["Perc_Contribution"], ascending=False).head(1),
+                ],
+                axis=0,
+            )
+
+        return sent_topics_sorteddf_mallet
+
+    def topics_per_document(self, start=0, end=1):
+        corpus_sel = self._corpus[start:end]
+        dominant_topics = []
+        topic_percentages = []
+        for i, corp in enumerate(corpus_sel):
+            topic_percs = self._lda_model[corp]
+            dominant_topic = sorted(topic_percs, key=lambda x: x[1], reverse=True)[0][0]
+            dominant_topics.append((i, dominant_topic))
+            topic_percentages.append(topic_percs)
+        return (dominant_topics, topic_percentages)
diff --git a/src/qrmine/content.py b/src/qrmine/content.py
index 3344a80..f9e6b0e 100644
--- a/src/qrmine/content.py
+++ b/src/qrmine/content.py
@@ -87,6 +87,10 @@ def idx(self, token):
     def doc(self):
         return self._processed
 
+    @property
+    def tokens(self):
+        return [token for token in self._processed if not token.is_stop and not token.is_punct and not token.is_space]
+
     def process(self):
         for token in self._processed:
             if token.is_stop or token.is_digit or token.is_punct or token.is_space:
diff --git a/src/qrmine/mlqrmine.py b/src/qrmine/mlqrmine.py
index 12b75a3..fcfac7a 100644
--- a/src/qrmine/mlqrmine.py
+++ b/src/qrmine/mlqrmine.py
@@ -1,13 +1,10 @@
 import numpy
 from imblearn.over_sampling import RandomOverSampler
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense
-from numpy import random, argsort, sqrt, array, ones
 from pandas import read_csv
 from sklearn.cluster import KMeans
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from sklearn.preprocessing import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC
 from sklearn.neighbors import KDTree
@@ -17,6 +14,25 @@
 from mlxtend.frequent_patterns import apriori
 from mlxtend.frequent_patterns import association_rules
 
+import torch.nn as nn
+import torch.optim as optim
+import torch
+from torch.utils.data import DataLoader, TensorDataset
+class NeuralNet(nn.Module):
+    def __init__(self, input_dim):
+        super(NeuralNet, self).__init__()
+        self.fc1 = nn.Linear(input_dim, 12)
+        self.fc2 = nn.Linear(12, 8)
+        self.fc3 = nn.Linear(8, 1)
+        self.relu = nn.ReLU()
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.relu(self.fc1(x))
+        x = self.relu(self.fc2(x))
+        x = self.sigmoid(self.fc3(x))
+        return x
+
 
 class MLQRMine(object):
 
@@ -24,13 +40,13 @@ def __init__(self):
         self._seed = randint(1, 9)
         self._csvfile = ""
         self._titles = None
+        self._model = None
         self._dataset = None
         self._X = None
         self._y = None
         self._X_original = None
         self._y_original = None
         self._dataset_original = None
-        self._model = Sequential()
         self._sc = StandardScaler()
         self._vnum = 0  # Number of variables
         self._classifier = XGBClassifier()
@@ -147,22 +163,57 @@ def prepare_data(self, oversample=False):
             self.oversample()
 
     def get_nnet_predictions(self):
-        self._model.add(Dense(12, input_dim=self._vnum, kernel_initializer='uniform', activation='relu'))
-        self._model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
-        self._model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
-        # Compile model
-        self._model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
-        # Fit the model
-        self._model.fit(self._X, self._y, epochs=self._epochs, batch_size=10, verbose=2)
-
-        # calculate predictions
-        predictions = self._model.predict(self._X_original)
-        # round predictions
-        rounded = [round(x[0]) for x in predictions]
+
+        self._model = NeuralNet(self._vnum)
+        criterion = nn.BCELoss()
+        optimizer = optim.Adam(self._model.parameters(), lr=0.001)
+
+        # Convert data to PyTorch tensors
+        X_tensor = torch.tensor(self._X, dtype=torch.float32)
+        y_tensor = torch.tensor(self._y, dtype=torch.float32).view(-1, 1)
+
+        # Create a dataset and data loader
+        dataset = TensorDataset(X_tensor, y_tensor)
+        dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
+
+        # Train the model
+        for epoch in range(self._epochs):
+            for batch_X, batch_y in dataloader:
+                optimizer.zero_grad()
+                outputs = self._model(batch_X)
+                loss = criterion(outputs, batch_y)
+                loss.backward()
+                optimizer.step()
+
+        # Calculate predictions
+        with torch.no_grad():
+            predictions = self._model(torch.tensor(self._X_original, dtype=torch.float32))
+            rounded = [round(x.item()) for x in predictions]
+        # print("Predictions: ", rounded)
+        # Calculate accuracy
+        correct = sum([1 for i in range(len(rounded)) if rounded[i] == self._y_original[i]])
+        total = len(rounded)
+        accuracy = correct / total
+        print(f'Accuracy: {accuracy * 100:.2f}%')
         return rounded
 
     def get_nnet_scores(self):
-        return self._model.evaluate(self._X, self._y)
+        # evalute the pytorch model
+        self._model.eval()
+        X_tensor = torch.tensor(self._X, dtype=torch.float32)
+        y_tensor = torch.tensor(self._y, dtype=torch.float32).view(-1, 1)
+        dataset = TensorDataset(X_tensor, y_tensor)
+        dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for batch_X, batch_y in dataloader:
+                outputs = self._model(batch_X)
+                predicted = (outputs > 0.5).float()
+                total += batch_y.size(0)
+                correct += (predicted == batch_y).sum().item()
+        accuracy = correct / total
+        print(f'Accuracy: {accuracy * 100:.2f}%')
 
     def svm_confusion_matrix(self):
         """Generate confusion matrix for SVM
@@ -211,7 +262,6 @@ def get_centroids(self, c=1):
             print("Mean")
             print(self._dataset.iloc[cluster_list, :].mean(axis=0))
 
-
     """
     TODO: This is not working yet.
     use the ColumnTransformer instead of categorical_features
diff --git a/src/qrmine/readfiles.py b/src/qrmine/readfiles.py
index a460795..a213ff7 100644
--- a/src/qrmine/readfiles.py
+++ b/src/qrmine/readfiles.py
@@ -1,5 +1,6 @@
 import re
-
+import requests
+from pypdf import PdfReader
 
 class ReadData(object):
     def __init__(self):
@@ -37,22 +38,10 @@ def append(self, title, document):
         self._documents.append(document)
         self._content += document
 
-    def read_file(self, file_names):
-        if len(file_names) > 1:
-            for file_name in file_names:
-                with open(file_name, 'r') as f:
-                    read_from_file = f.read()
-                    self._content = re.sub('<[^<]+?>', '', read_from_file)
-                    self._documents = re.split('<break>.*?</break>', read_from_file)
-                    # Delete the last blank record
-                    del self._documents[-1]
-                    pattern = r"<break>(.*?)</break>"
-                    _title = re.findall(pattern, read_from_file, flags=re.DOTALL)[0]
-                    self._titles.append(_title)
-                f.close()
-        else:
-            file_name = file_names[0]
-            with open(file_name, 'r') as f:
+    def read_file(self, input):
+        # if input is a file name
+        if isinstance(input, str):
+            with open(input, 'r') as f:
                 read_from_file = f.read()
                 self._content = re.sub('<[^<]+?>', '', read_from_file)
                 self._documents = re.split('<break>.*?</break>', read_from_file)
@@ -60,25 +49,51 @@ def read_file(self, file_names):
                 del self._documents[-1]
                 pattern = r"<break>(.*?)</break>"
                 self._titles = re.findall(pattern, read_from_file, flags=re.DOTALL)
+        # if input is a folder name
+        elif isinstance(input, str):
+            import os
+            for file_name in os.listdir(input):
+                if file_name.endswith('.txt'):
+                    with open(os.path.join(input, file_name), 'r') as f:
+                        read_from_file = f.read()
+                        self._content += read_from_file
+                        self._documents.append(read_from_file)
+                        self.titles.append(file_name)
+                if file_name.endswith('.pdf'):
+                    with open(os.path.join(input, file_name), 'rb') as f:
+                        reader = PdfReader(f)
+                        read_from_file = ""
+                        for page in reader.pages:
+                            read_from_file += page.extract_text()
+                        self._content += read_from_file
+                        self._documents.append(read_from_file)
+                        self.titles.append(file_name)
+        # if input is a url
+        elif isinstance(input, str):
+            response = requests.get(input)
+            if response.status_code == 200:
+                read_from_file = response.text
+                self._content = read_from_file
+                self._documents.append(read_from_file)
+                self.titles.append(input)
+        else:
+            raise ValueError("Input must be a file name, folder name or url.")
 
-                """
-                Combine duplicate topics using Dict
-                Currently supported only for single file.
-                """
-
-                doc_dict = {}
-                ct3 = 0
-                for t in self._titles:
-                    doc = doc_dict.get(t)
-                    if doc:
-                        doc_dict[t] = doc + self._documents[ct3]
-                    else:
-                        doc_dict[t] = self._documents[ct3]
-                    ct3 += 1
-                self._titles.clear()
-                self._documents.clear()
-                for t in doc_dict.keys():
-                    self._documents.append(doc_dict.get(t))
-                    self._titles.append(t)
+        """
+        Combine duplicate topics using Dict
+        """
 
-                f.close()
+        doc_dict = {}
+        ct3 = 0
+        for t in self._titles:
+            doc = doc_dict.get(t)
+            if doc:
+                doc_dict[t] = doc + self._documents[ct3]
+            else:
+                doc_dict[t] = self._documents[ct3]
+            ct3 += 1
+        self._titles.clear()
+        self._documents.clear()
+        for t in doc_dict.keys():
+            self._documents.append(doc_dict.get(t))
+            self._titles.append(t)
diff --git a/src/qrmine/resources/df_dominant_topic.csv b/src/qrmine/resources/df_dominant_topic.csv
new file mode 100644
index 0000000..115eb63
--- /dev/null
+++ b/src/qrmine/resources/df_dominant_topic.csv
@@ -0,0 +1,12 @@
+,Document_No,Dominant_Topic,Topic_Perc_Contrib,Keywords,Text
+0,0,4,0.9903,"., GT, Strauss, ,, coding, 
+, ), Theory, seminal, (","['ground', 'theory', '(', 'GT', ')', 'emerge', 'research', 'methodology', 'medical', 'sociology', 'follow', 'seminal', 'work', 'Barney', 'Glaser', 'Anselm', 'Strauss', '.', ',', 'later', 'develop', 'different', 'view', 'original', 'contribution', 'supporter', 'lead', 'establishment', 'classical', 'Glaserian', 'GT', 'pragmatic', 'straussian', 'Grounded', 'Theory', '.', '\n\n', 'Strauss', 'Corbin', '(', '2', ')', 'recommend', 'strict', 'code', 'structure', 'elaborate', 'code', 'structure', 'datum', '.', 'seminal', 'article', 'Strauss', 'Corbin', 'describe', 'stage', 'coding', ':', 'open', 'coding', ',', 'axial', 'coding', ',', 'selective', 'coding', '.', 'classical', 'Grounded', 'Theory', 'offer', 'flexibility', 'Straussian', 'GT', 'easy', 'conduct', 'especially', 'new', 'researcher', '.', '\n']"
+1,1,1,0.7811,",, theory, ., GT, evaluation, structure, coding, 
+, ), (","['\n', 'constant', 'comparison', 'central', 'classical', 'Grounded', 'Theory', ',', 'involve', 'incident', 'incident', 'comparison', 'identify', 'category', ',', 'incident', 'category', 'comparison', 'refine', 'category', 'category', 'category', 'comparison', 'emergence', 'theory', '.', '\n\n', 'Glaser', 'Classical', 'GT', '(', '1', ')', 'provide', 'guideline', 'evaluation', 'GT', 'methodology', '.', 'evaluation', 'base', 'theory', 'fit', 'datum', ',', 'theory', 'understandable', 'non', '-', 'professional', ',', 'theory', 'generalizable', 'situation', ',', 'theory', 'offer', 'control', 'structure', 'process', '.', '\n\n', 'Strauss', 'Corbin', '(', '2', ')', 'recommend', 'strict', 'code', 'structure', 'elaborate', 'code', 'structure', 'datum', '.', 'seminal', 'article', 'Strauss', 'Corbin', 'describe', 'stage', 'coding', ':', 'open', 'coding', ',', 'axial', 'coding', ',', 'selective', 'coding', '.', 'classical', 'Grounded', 'Theory', 'offer', 'flexibility', 'Straussian', 'GT', 'easy', 'conduct', 'especially', 'new', 'researcher', '.', '\n']"
+2,2,1,0.9783,",, theory, ., GT, evaluation, structure, coding, 
+, ), (","['\n', 'Glaser', 'Classical', 'GT', '(', '1', ')', 'provide', 'guideline', 'evaluation', 'GT', 'methodology', '.', 'evaluation', 'base', 'theory', 'fit', 'datum', ',', 'theory', 'understandable', 'non', '-', 'professional', ',', 'theory', 'generalizable', 'situation', ',', 'theory', 'offer', 'control', 'structure', 'process', '.', '\n']"
+3,3,3,0.9952,"., ,, coding, category, open, QRMine, datum, researcher, code, GT","['\n', 'open', 'coding', 'step', 'datum', 'break', 'analytically', ',', 'conceptually', 'similar', 'chunk', 'group', 'category', 'subcategorie', '.', 'difference', 'category', 'establish', ',', 'property', 'dimension', 'dissect', '.', 'code', 'GT', 'overwhelming', ',', 'scale', 'category', 'open', 'coding', 'difficult', '.', 'lead', 'generation', 'low', '-', 'level', 'theory', '.', 'natural', 'language', 'processing', ',', 'information', 'system', 'help', 'young', 'researcher', 'sense', 'datum', 'collect', 'stage', 'open', 'coding', '.', 'QRMine', 'software', 'suite', 'support', 'qualitative', 'researcher', 'NLP', '.', ' ', 'QRMine', 'opensource', 'available', '.', 'idea', ',', 'comment', 'pull', 'request', 'welcome', '.', 'jupyter', 'notebook', 'show', 'feature', 'QRMine', '.', '\n\n', 'open', 'coding', 'step', 'datum', 'break', 'analytically', ',', 'conceptually', 'similar', 'chunk', 'group', 'category', 'subcategorie', '.', 'difference', 'category', 'establish', ',', 'property', 'dimension', 'dissect', '.', 'code', 'GT', 'overwhelming', ',', 'scale', 'category', 'open', 'coding', 'difficult', '.', 'lead', 'generation', 'low', '-', 'level', 'theory', '.', 'natural', 'language', 'processing', ',', 'information', 'system', 'help', 'young', 'researcher', 'sense', 'datum', 'collect', 'stage', 'open', 'coding', '.', 'QRMine', 'software', 'suite', 'support', 'qualitative', 'researcher', 'NLP', '.', ' ', 'QRMine', 'opensource', 'available', '.', 'idea', ',', 'comment', 'pull', 'request', 'welcome', '.', 'jupyter', 'notebook', 'show', 'feature', 'QRMine', '.', '\n']"
+4,4,4,0.9793,"., GT, Strauss, ,, coding, 
+, ), Theory, seminal, (","['\n', 'ground', 'theory', '(', 'GT', ')', 'emerge', 'research', 'methodology', 'medical', 'sociology', 'follow', 'seminal', 'work', 'Barney', 'Glaser', 'Anselm', 'Strauss', '.', ',', 'later', 'develop', 'different', 'view', 'original', 'contribution', 'supporter', 'lead', 'establishment', 'classical', 'Glaserian', 'GT', 'pragmatic', 'straussian', 'Grounded', 'Theory', '.', '\n']"
+5,5,2,0.9712,"category, comparison, incident, ,, 
+, involve, refine, identify, emergence, constant","['\n', 'constant', 'comparison', 'central', 'classical', 'Grounded', 'Theory', ',', 'involve', 'incident', 'incident', 'comparison', 'identify', 'category', ',', 'incident', 'category', 'comparison', 'refine', 'category', 'category', 'category', 'comparison', 'emergence', 'theory', '.', '\n']"
diff --git a/src/qrmine/visualize.py b/src/qrmine/visualize.py
new file mode 100644
index 0000000..4a7fc25
--- /dev/null
+++ b/src/qrmine/visualize.py
@@ -0,0 +1,390 @@
+"""
+Copyright (C) 2025 Bell Eapen
+
+This file is part of qrmine.
+
+qrmine is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+qrmine is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with qrmine.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+from collections import Counter
+
+import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from matplotlib.patches import Rectangle
+from matplotlib.ticker import FuncFormatter
+from sklearn.manifold import TSNE
+from wordcloud import STOPWORDS, WordCloud
+
+
+class QRVisualize:
+    def __init__(self, data: pd.DataFrame = None):
+        """
+        Initialize the QRVisualize class with a DataFrame.
+
+        Parameters:
+        data (pd.DataFrame): The DataFrame containing the data to visualize.
+        """
+        self.data = data
+
+    def plot_frequency_distribution_of_words(self, df=None, folder_path=None):
+        if df is None:
+            df = self.data
+        doc_lens = [len(d) for d in df.Text]
+
+        # Plot
+        plt.figure(figsize=(16, 7), dpi=160)
+        plt.hist(doc_lens, bins=1000, color="navy")
+        plt.text(750, 100, "Mean   : " + str(round(np.mean(doc_lens))))
+        plt.text(750, 90, "Median : " + str(round(np.median(doc_lens))))
+        plt.text(750, 80, "Stdev   : " + str(round(np.std(doc_lens))))
+        plt.text(750, 70, "1%ile    : " + str(round(np.quantile(doc_lens, q=0.01))))
+        plt.text(750, 60, "99%ile  : " + str(round(np.quantile(doc_lens, q=0.99))))
+
+        plt.gca().set(
+            xlim=(0, 1000), ylabel="Number of Documents", xlabel="Document Word Count"
+        )
+        plt.tick_params(size=16)
+        plt.xticks(np.linspace(0, 1000, 9))
+        plt.title("Distribution of Document Word Counts", fontdict=dict(size=22))
+        plt.show()
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
+
+    def plot_distribution_by_topic(self, df=None, folder_path=None):
+        if df is None:
+            df = self.data
+        # Plot
+        cols = [
+            color for name, color in mcolors.TABLEAU_COLORS.items()
+        ]  # more colors: 'mcolors.XKCD_COLORS'
+
+        fig, axes = plt.subplots(
+            2, 2, figsize=(16, 14), dpi=160, sharex=True, sharey=True
+        )
+
+        for i, ax in enumerate(axes.flatten()):
+            df_dominant_topic_sub = df.loc[df.Dominant_Topic == i, :]
+            doc_lens = [len(d) for d in df_dominant_topic_sub.Text]
+            ax.hist(doc_lens, bins=1000, color=cols[i])
+            ax.tick_params(axis="y", labelcolor=cols[i], color=cols[i])
+            sns.kdeplot(doc_lens, color="black", shade=False, ax=ax.twinx())
+            ax.set(xlim=(0, 1000), xlabel="Document Word Count")
+            ax.set_ylabel("Number of Documents", color=cols[i])
+            ax.set_title("Topic: " + str(i), fontdict=dict(size=16, color=cols[i]))
+
+        fig.tight_layout()
+        fig.subplots_adjust(top=0.90)
+        plt.xticks(np.linspace(0, 1000, 9))
+        fig.suptitle(
+            "Distribution of Document Word Counts by Dominant Topic", fontsize=22
+        )
+        plt.show()
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
+
+    def plot_wordcloud(self, topics=None, folder_path=None):
+        cols = [
+            color for name, color in mcolors.TABLEAU_COLORS.items()
+        ]  # more colors: 'mcolors.XKCD_COLORS'
+
+        cloud = WordCloud(
+            stopwords=STOPWORDS,
+            background_color="white",
+            width=250,
+            height=180,
+            max_words=5,
+            colormap="tab10",
+            color_func=lambda *args, **kwargs: cols[i],
+            prefer_horizontal=1.0,
+        )
+
+        fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
+
+        for i, ax in enumerate(axes.flatten()):
+            fig.add_subplot(ax)
+            topic_words = dict(topics[i][1])
+            cloud.generate_from_frequencies(topic_words, max_font_size=300)
+            plt.gca().imshow(cloud)
+            plt.gca().set_title("Topic " + str(i), fontdict=dict(size=16))
+            plt.gca().axis("off")
+
+        plt.subplots_adjust(wspace=0, hspace=0)
+        plt.axis("off")
+        plt.margins(x=0, y=0)
+        plt.tight_layout()
+        plt.show()
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
+
+    def plot_importance(self, topics=None, processed_docs=None, folder_path=None):
+        data_flat = [w for w_list in processed_docs for w in w_list]
+        counter = Counter(data_flat)
+
+        out = []
+        for i, topic in topics:
+            for word, weight in topic:
+                out.append([word, i, weight, counter[word]])
+
+        df = pd.DataFrame(out, columns=["word", "topic_id", "importance", "word_count"])
+
+        # Plot Word Count and Weights of Topic Keywords
+        fig, axes = plt.subplots(2, 2, figsize=(16, 10), sharey=True, dpi=160)
+        cols = [color for name, color in mcolors.TABLEAU_COLORS.items()]
+        for i, ax in enumerate(axes.flatten()):
+            ax.bar(
+                x="word",
+                height="word_count",
+                data=df.loc[df.topic_id == i, :],
+                color=cols[i],
+                width=0.5,
+                alpha=0.3,
+                label="Word Count",
+            )
+            ax_twin = ax.twinx()
+            ax_twin.bar(
+                x="word",
+                height="importance",
+                data=df.loc[df.topic_id == i, :],
+                color=cols[i],
+                width=0.2,
+                label="Weights",
+            )
+            ax.set_ylabel("Word Count", color=cols[i])
+            ax_twin.set_ylim(0, 0.030)
+            ax.set_ylim(0, 3500)
+            ax.set_title("Topic: " + str(i), color=cols[i], fontsize=16)
+            ax.tick_params(axis="y", left=False)
+            ax.set_xticklabels(
+                df.loc[df.topic_id == i, "word"],
+                rotation=30,
+                horizontalalignment="right",
+            )
+            ax.legend(loc="upper left")
+            ax_twin.legend(loc="upper right")
+
+        fig.tight_layout(w_pad=2)
+        fig.suptitle("Word Count and Importance of Topic Keywords", fontsize=22, y=1.05)
+        plt.show()
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
+
+    def sentence_chart(self, lda_model=None, corpus=None, start=0, end=13):
+        corp = corpus[start:end]
+        mycolors = [color for name, color in mcolors.TABLEAU_COLORS.items()]
+
+        fig, axes = plt.subplots(
+            end - start, 1, figsize=(20, (end - start) * 0.95), dpi=160
+        )
+        axes[0].axis("off")
+        for i, ax in enumerate(axes):
+            if i > 0:
+                corp_cur = corp[i - 1]
+                topic_percs, wordid_topics, wordid_phivalues = lda_model[corp_cur]
+                word_dominanttopic = [
+                    (lda_model.id2word[wd], topic[0]) for wd, topic in wordid_topics
+                ]
+                ax.text(
+                    0.01,
+                    0.5,
+                    "Doc " + str(i - 1) + ": ",
+                    verticalalignment="center",
+                    fontsize=16,
+                    color="black",
+                    transform=ax.transAxes,
+                    fontweight=700,
+                )
+
+                # Draw Rectange
+                topic_percs_sorted = sorted(
+                    topic_percs, key=lambda x: (x[1]), reverse=True
+                )
+                ax.add_patch(
+                    Rectangle(
+                        (0.0, 0.05),
+                        0.99,
+                        0.90,
+                        fill=None,
+                        alpha=1,
+                        color=mycolors[topic_percs_sorted[0][0]],
+                        linewidth=2,
+                    )
+                )
+
+                word_pos = 0.06
+                for j, (word, topics) in enumerate(word_dominanttopic):
+                    if j < 14:
+                        ax.text(
+                            word_pos,
+                            0.5,
+                            word,
+                            horizontalalignment="left",
+                            verticalalignment="center",
+                            fontsize=16,
+                            color=mycolors[topics],
+                            transform=ax.transAxes,
+                            fontweight=700,
+                        )
+                        word_pos += 0.009 * len(
+                            word
+                        )  # to move the word for the next iter
+                        ax.axis("off")
+                ax.text(
+                    word_pos,
+                    0.5,
+                    ". . .",
+                    horizontalalignment="left",
+                    verticalalignment="center",
+                    fontsize=16,
+                    color="black",
+                    transform=ax.transAxes,
+                )
+
+        plt.subplots_adjust(wspace=0, hspace=0)
+        plt.suptitle(
+            "Sentence Topic Coloring for Documents: "
+            + str(start)
+            + " to "
+            + str(end - 2),
+            fontsize=22,
+            y=0.95,
+            fontweight=700,
+        )
+        plt.tight_layout()
+        plt.show()
+
+    def cluster_chart(self, lda_model=None, corpus=None, n_topics=4, folder_path=None):
+        # Get topic weights
+        topic_weights = []
+        for i, row_list in enumerate(lda_model[corpus]):
+            topic_weights.append([w for i, w in row_list[0]])
+
+        # Array of topic weights
+        arr = pd.DataFrame(topic_weights).fillna(0).values
+
+        # Keep the well separated points (optional)
+        arr = arr[np.amax(arr, axis=1) > 0.35]
+
+        # Dominant topic number in each doc
+        topic_num = np.argmax(arr, axis=1)
+
+        # tSNE Dimension Reduction
+        tsne_model = TSNE(
+            n_components=2, verbose=1, random_state=0, angle=0.99, init="pca"
+        )
+        tsne_lda = tsne_model.fit_transform(arr)
+
+        # Plot
+        plt.figure(figsize=(16, 10), dpi=160)
+        for i in range(n_topics):
+            plt.scatter(
+                tsne_lda[topic_num == i, 0],
+                tsne_lda[topic_num == i, 1],
+                label=str(i),
+                alpha=0.5,
+            )
+        plt.title("t-SNE Clustering of Topics", fontsize=22)
+        plt.xlabel("t-SNE Dimension 1", fontsize=16)
+        plt.ylabel("t-SNE Dimension 2", fontsize=16)
+        plt.legend(title="Topic Number", loc="upper right")
+        plt.show()
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
+
+    def most_discussed_topics(
+        self, lda_model, dominant_topics, topic_percentages, folder_path=None
+    ):
+
+        # Distribution of Dominant Topics in Each Document
+        df = pd.DataFrame(dominant_topics, columns=["Document_Id", "Dominant_Topic"])
+        dominant_topic_in_each_doc = df.groupby("Dominant_Topic").size()
+        df_dominant_topic_in_each_doc = dominant_topic_in_each_doc.to_frame(
+            name="count"
+        ).reset_index()
+
+        # Total Topic Distribution by actual weight
+        topic_weightage_by_doc = pd.DataFrame([dict(t) for t in topic_percentages])
+        df_topic_weightage_by_doc = (
+            topic_weightage_by_doc.sum().to_frame(name="count").reset_index()
+        )
+
+        # Top 3 Keywords for each Topic
+        topic_top3words = [
+            (i, topic)
+            for i, topics in lda_model.show_topics(formatted=False)
+            for j, (topic, wt) in enumerate(topics)
+            if j < 3
+        ]
+
+        df_top3words_stacked = pd.DataFrame(
+            topic_top3words, columns=["topic_id", "words"]
+        )
+        df_top3words = df_top3words_stacked.groupby("topic_id").agg(", \n".join)
+        df_top3words.reset_index(level=0, inplace=True)
+
+        # Plot
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4), dpi=120, sharey=True)
+
+        # Topic Distribution by Dominant Topics
+        ax1.bar(
+            x="Dominant_Topic",
+            height="count",
+            data=df_dominant_topic_in_each_doc,
+            width=0.5,
+            color="firebrick",
+        )
+        ax1.set_xticks(
+            range(df_dominant_topic_in_each_doc.Dominant_Topic.unique().__len__())
+        )
+        tick_formatter = FuncFormatter(
+            lambda x, pos: "Topic "
+            + str(x)
+            + "\n"
+            + df_top3words.loc[df_top3words.topic_id == x, "words"].values[0]
+        )
+        ax1.xaxis.set_major_formatter(tick_formatter)
+        ax1.set_title("Number of Documents by Dominant Topic", fontdict=dict(size=10))
+        ax1.set_ylabel("Number of Documents")
+        ax1.set_ylim(0, 1000)
+
+        # Topic Distribution by Topic Weights
+        ax2.bar(
+            x="index",
+            height="count",
+            data=df_topic_weightage_by_doc,
+            width=0.5,
+            color="steelblue",
+        )
+        ax2.set_xticks(range(df_topic_weightage_by_doc.index.unique().__len__()))
+        ax2.xaxis.set_major_formatter(tick_formatter)
+        ax2.set_title("Number of Documents by Topic Weightage", fontdict=dict(size=10))
+
+        plt.show()
+
+        # save
+        if folder_path:
+            plt.savefig(folder_path)
+            plt.close()
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..a5c4b31
--- /dev/null
+++ b/test.py
@@ -0,0 +1,33 @@
+import spacy
+
+# Load spaCy model
+nlp = spacy.load("en_core_web_sm")
+
+# Sample documents
+documents = [
+    "Natural language processing is a field of AI.",
+    "Topic modeling helps in uncovering the main themes in a collection of documents.",
+    "Semantic clustering groups similar documents together based on meaning.",
+    "SpaCy is a popular NLP library.",
+    "Gensim is commonly used for topic modeling.",
+]
+
+
+# Preprocess the documents using spaCy
+def preprocess(doc):
+    # Tokenize and preprocess each document
+    doc = nlp(doc)
+    print(f"Original Document: {doc}")
+    # Lemmatize and remove stop words
+    tokens = [token.lemma_ for token in doc if not token.is_stop]
+    print(f"Processed Tokens: {tokens}")
+    return tokens
+
+
+# Apply preprocessing to each document
+processed_docs = [preprocess(doc) for doc in documents]
+
+
+# Print the processed documents
+for i, doc in enumerate(processed_docs):
+    print(f"Document {i + 1}: {doc}")
\ No newline at end of file
diff --git a/tests/test_nlp.py b/tests/test_nlp.py
index 4ad331d..6c922a5 100644
--- a/tests/test_nlp.py
+++ b/tests/test_nlp.py
@@ -1,32 +1,45 @@
 import pytest
 
 
-
 @pytest.fixture
 def corpus_fixture():
     from pkg_resources import resource_filename
     from src.qrmine import ReadData
+
     corpus = ReadData()
-    file_path = resource_filename('src.qrmine.resources', 'interview.txt')
-    corpus.read_file([file_path])
+    file_path = resource_filename("src.qrmine.resources", "interview.txt")
+    corpus.read_file(file_path)
     return corpus
 
+
 # instannce of Qrmine as fixture
 @pytest.fixture
 def q():
     from src.qrmine import Qrmine
+
     _q = Qrmine()
     return _q
 
+
+@pytest.fixture
+def cluster():
+    from src.qrmine import ClusterDocs
+
+    _cluster = ClusterDocs()
+    return _cluster
+
+
 # Ref: https://docs.pytest.org/en/latest/capture.html
 def test_generate_dict(corpus_fixture, capsys, q):
     from src.qrmine import Content
+
     num = 10
     all_interviews = Content(corpus_fixture.content)
     q.print_dict(all_interviews, num)
     captured = capsys.readouterr()
     print(captured.out)
-    assert 'code' in captured.out
+    assert "code" in captured.out
+
 
 def test_generate_topics(corpus_fixture, capsys, q):
     q.content = corpus_fixture
@@ -34,22 +47,53 @@ def test_generate_topics(corpus_fixture, capsys, q):
     q.print_topics()
     captured = capsys.readouterr()
     print(captured.out)
-    assert 'TOPIC' in captured.out
+    assert "TOPIC" in captured.out
+
 
 def test_category_basket(corpus_fixture, capsys, q):
     q.content = corpus_fixture
     print(q.category_basket())
     captured = capsys.readouterr()
     print(captured.out)
-    assert 'theory' in captured.out
+    assert "theory" in captured.out
+
 
 def test_category_association(corpus_fixture, capsys, q):
     q.content = corpus_fixture
     print(q.category_association())
     captured = capsys.readouterr()
     print(captured.out)
-    assert 'theory' in captured.out
+    assert "theory" in captured.out
+
 
+def test_cluster_topics(corpus_fixture, capsys, cluster):
+    cluster.documents = corpus_fixture.documents
+    cluster.titles = corpus_fixture.titles
+    cluster.print_clusters()
+    captured = capsys.readouterr()
+    print(captured.out)
+    assert "Document" in captured.out
+    cluster.print_topics()
+    captured = capsys.readouterr()
+    print(captured.out)
+    assert "topic" in captured.out
 
+    print(cluster.build_lda_model())
 
+    print(cluster.topics_per_document())
+    # Format
+    df_dominant_topic = cluster.format_topics_sentences()
+    # Format the output
+    df_dominant_topic.columns = [
+        "Document_No",
+        "Dominant_Topic",
+        "Topic_Perc_Contrib",
+        "Keywords",
+        "Text",
+    ]
+    print(df_dominant_topic.head(10))
+    assert "Document_No" in df_dominant_topic.columns
 
+    df_sorted = cluster.most_representative_docs()
+    print(df_sorted.head(10))
+    assert "Dominant_Topic" in df_sorted.columns
diff --git a/tests/test_num.py b/tests/test_num.py
index f0c53cd..ac7a139 100644
--- a/tests/test_num.py
+++ b/tests/test_num.py
@@ -9,7 +9,7 @@ def ml_fixture():
     ml = MLQRMine()
     file_path = resource_filename('src.qrmine.resources', 'numeric.csv')
     ml.csvfile = file_path
-    return ml 
+    return ml
 
 
 
@@ -19,7 +19,7 @@ def test_nn(ml_fixture, capsys):
     ml_fixture.prepare_data(True)
     ml_fixture.get_nnet_predictions()
     captured = capsys.readouterr()
-    assert 'accuracy' in captured.out
+    assert 'Accuracy' in captured.out
 
 def test_svm(ml_fixture, capsys):
     ml_fixture.prepare_data(True)
diff --git a/tests/test_readfiles.py b/tests/test_readfiles.py
index aff3a5d..963ed90 100644
--- a/tests/test_readfiles.py
+++ b/tests/test_readfiles.py
@@ -8,8 +8,8 @@ def corpus_fixture():
     from src.qrmine import ReadData
     corpus = ReadData()
     file_path = resource_filename('src.qrmine.resources', 'interview.txt')
-    corpus.read_file([file_path])
-    return corpus 
+    corpus.read_file(file_path)
+    return corpus
 
 
 def test_content(corpus_fixture):
diff --git a/tests/test_visualize.py b/tests/test_visualize.py
new file mode 100644
index 0000000..41f7145
--- /dev/null
+++ b/tests/test_visualize.py
@@ -0,0 +1,114 @@
+import pytest
+import pandas as pd
+from src.qrmine.visualize import QRVisualize
+
+
+@pytest.fixture
+def v():
+    from pkg_resources import resource_filename
+
+    file_path = resource_filename("src.qrmine.resources", "df_dominant_topic.csv")
+    data = pd.read_csv(file_path)
+    _v = QRVisualize(data)
+    return _v
+
+
+@pytest.fixture
+def topics():
+    return [
+        (
+            0,
+            [
+                (".", 0.095292516),
+                (",", 0.053392828),
+                ("category", 0.032462463),
+                ("coding", 0.032456465),
+                ("open", 0.032437164),
+                ("QRMine", 0.03243305),
+                ("datum", 0.021980358),
+                ("researcher", 0.021978099),
+                ("theory", 0.011536299),
+                ("GT", 0.011533132),
+            ],
+        ),
+        (
+            1,
+            [
+                (".", 0.007783216),
+                (",", 0.007773952),
+                ("open", 0.007728422),
+                ("researcher", 0.0077227736),
+                ("coding", 0.007722049),
+                ("category", 0.007721938),
+                ("datum", 0.007717547),
+                ("QRMine", 0.007716193),
+                ("dissect", 0.0077070068),
+                ("support", 0.0077060354),
+            ],
+        ),
+        (
+            2,
+            [
+                (",", 0.05126711),
+                (".", 0.05125151),
+                ("theory", 0.038604487),
+                ("category", 0.03227912),
+                ("GT", 0.032278605),
+                ("\n", 0.029119665),
+                ("comparison", 0.025947908),
+                ("coding", 0.025941858),
+                ("incident", 0.019622542),
+                (")", 0.019619444),
+            ],
+        ),
+        (
+            3,
+            [
+                (".", 0.007849805),
+                (",", 0.007837688),
+                ("theory", 0.00781459),
+                ("coding", 0.0078089647),
+                ("category", 0.0077514737),
+                ("GT", 0.0077493717),
+                ("datum", 0.007742789),
+                ("open", 0.0077355755),
+                ("\n", 0.0077245855),
+                ("researcher", 0.0077191954),
+            ],
+        ),
+        (
+            4,
+            [
+                (",", 0.007834569),
+                (".", 0.007812336),
+                ("coding", 0.0077863215),
+                ("category", 0.007759207),
+                ("theory", 0.0077459146),
+                ("GT", 0.0077370973),
+                ("code", 0.0077265715),
+                ("datum", 0.007720947),
+                ("open", 0.007720898),
+                ("comparison", 0.007720567),
+            ],
+        ),
+    ]
+
+
+def test_frequency_distribution_of_words(v, capsys):
+    v.plot_frequency_distribution_of_words(
+        v.data
+    )
+    captured = capsys.readouterr()
+    print(captured.out)
+
+
+def test_distribution_by_topic(v, capsys):
+    v.plot_distribution_by_topic(v.data)
+    captured = capsys.readouterr()
+    print(captured.out)
+
+
+def test_plot_wordcloud(v, topics, capsys):
+    v.plot_wordcloud(topics)
+    captured = capsys.readouterr()
+    print(captured.out)
diff --git a/tox.ini b/tox.ini
index 3eb707d..dbb293d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -8,9 +8,6 @@ envlist = py311, integration
 
 [testenv]
 setenv = TOXINIDIR = {toxinidir}
-deps =
-    -rrequirements.txt
-    -rdev-requirements.txt
 commands =
     python -m spacy download en_core_web_sm
     py.test {posargs}
@@ -20,9 +17,6 @@ extras =
 
 [testenv:integration]
 setenv = TOXINIDIR = {toxinidir}
-deps =
-    -rrequirements.txt
-    -rdev-requirements.txt
 commands =
     python -m spacy download en_core_web_sm
     python qrminer.py
\ No newline at end of file