diff --git a/.gitignore b/.gitignore index b8ab5703..c1750078 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ docs/source/_templates/ docs/source/_static/ .env venv/ +.venv/ .vscode/ # exclude pdf, mp3 @@ -28,9 +29,12 @@ venv/ *.mp3 *.sqlite *.google-cookie +*.python-version examples/graph_examples/ScrapeGraphAI_generated_graph examples/**/result.csv examples/**/result.json main.py +lib/ +*.html +.idea - \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 43c5aeb4..338d488f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,83 @@ -## [1.4.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.3.2...v1.4.0) (2024-05-22) +## [1.5.0-beta.5](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.0-beta.4...v1.5.0-beta.5) (2024-05-26) + + +### Features + +* **version:** python 3.12 is now supported 🚀 ([5fb9115](https://github.com/VinciGit00/Scrapegraph-ai/commit/5fb9115330141ac2c1dd97490284d4f1fa2c01c3)) + + +### Docs + +* **faq:** added faq section and refined installation ([545374c](https://github.com/VinciGit00/Scrapegraph-ai/commit/545374c17e9101a240fd1fbc380ce813c5aa6c2e)) +* updated requirements ([e43b801](https://github.com/VinciGit00/Scrapegraph-ai/commit/e43b8018f5f360b88c52e45ff4e1b4221386ea8e)) + +## [1.5.0-beta.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.0-beta.3...v1.5.0-beta.4) (2024-05-25) + + +### Features + +* **burr:** added burr integration in graphs and optional burr installation ([ac10128](https://github.com/VinciGit00/Scrapegraph-ai/commit/ac10128ff3af35c52b48c79d085e458524e8e48a)) +* **burr-bridge:** BurrBridge class to integrate inside BaseGraph ([6cbd84f](https://github.com/VinciGit00/Scrapegraph-ai/commit/6cbd84f254ebc1f1c68699273bdd8fcdb0fe26d4)) +* **burr:** first burr integration and docs ([19b27bb](https://github.com/VinciGit00/Scrapegraph-ai/commit/19b27bbe852f134cf239fc1945e7906bc24d7098)) +* **burr-node:** working burr bridge ([654a042](https://github.com/VinciGit00/Scrapegraph-ai/commit/654a04239640a89d9fa408ccb2e4485247ab84df)) + + +### Docs + +* **burr:** added dependecies and switched to furo ([819f071](https://github.com/VinciGit00/Scrapegraph-ai/commit/819f071f2dc64d090cb05c3571aff6c9cb9196d7)) +* **graph:** added new graphs and schema ([d27cad5](https://github.com/VinciGit00/Scrapegraph-ai/commit/d27cad591196b932c1bbcbaa936479a030ac67b5)) + +## [1.5.0-beta.3](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.0-beta.2...v1.5.0-beta.3) (2024-05-24) + + +### Bug Fixes + +* **kg:** removed unused nodes and utils ([5684578](https://github.com/VinciGit00/Scrapegraph-ai/commit/5684578fab635e862de58f7847ad736c6a57f766)) + +## [1.5.0-beta.2](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.0-beta.1...v1.5.0-beta.2) (2024-05-24) + + +### Bug Fixes + +* **pdf_scraper:** fix the pdf scraper gaph ([d00cde6](https://github.com/VinciGit00/Scrapegraph-ai/commit/d00cde60309935e283ba9116cf0b114e53cb9640)) +* **local_file:** fixed textual input pdf, csv, json and xml graph ([8d5eb0b](https://github.com/VinciGit00/Scrapegraph-ai/commit/8d5eb0bb0d5d008a63a96df94ce3842320376b8e)) + +## [1.5.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.4.0...v1.5.0-beta.1) (2024-05-24) + + +### Features + +* **knowledgegraph:** add knowledge graph node ([0196423](https://github.com/VinciGit00/Scrapegraph-ai/commit/0196423bdeea6568086aae6db8fc0f5652fc4e87)) +* add logger integration ([e53766b](https://github.com/VinciGit00/Scrapegraph-ai/commit/e53766b16e89254f945f9b54b38445a24f8b81f2)) +* **smart-scraper-multi:** add schema to graphs and created SmartScraperMultiGraph ([fc58e2d](https://github.com/VinciGit00/Scrapegraph-ai/commit/fc58e2d3a6f05efa72b45c9e68c6bb41a1eee755)) +* **base_graph:** alligned with main ([73fa31d](https://github.com/VinciGit00/Scrapegraph-ai/commit/73fa31db0f791d1fd63b489ac88cc6e595aa07f9)) +* **verbose:** centralized graph logging on debug or warning depending on verbose ([c807695](https://github.com/VinciGit00/Scrapegraph-ai/commit/c807695720a85c74a0b4365afb397bbbcd7e2889)) +* **node:** knowledge graph node ([8c33ea3](https://github.com/VinciGit00/Scrapegraph-ai/commit/8c33ea3fbce18f74484fe7bd9469ab95c985ad0b)) +* **multiple:** quick fix working ([58cc903](https://github.com/VinciGit00/Scrapegraph-ai/commit/58cc903d556d0b8db10284493b05bed20992c339)) +* **kg:** removed import ([a338383](https://github.com/VinciGit00/Scrapegraph-ai/commit/a338383399b669ae2dd7bfcec168b791e8206816)) +* **docloaders:** undetected-playwright ([7b3ee4e](https://github.com/VinciGit00/Scrapegraph-ai/commit/7b3ee4e71e4af04edeb47999d70d398b67c93ac4)) +* **multiple_search:** working multiple example ([bed3eed](https://github.com/VinciGit00/Scrapegraph-ai/commit/bed3eed50c1678cfb07cba7b451ac28d38c87d7c)) +* **kg:** working rag kg ([c75e6a0](https://github.com/VinciGit00/Scrapegraph-ai/commit/c75e6a06b1a647f03e6ac6eeacdc578a85baa25b)) + + +### Bug Fixes + +* error in jsons ([ca436ab](https://github.com/VinciGit00/Scrapegraph-ai/commit/ca436abf3cbff21d752a71969e787e8f8c98c6a8)) +* **logger:** set up centralized root logger in base node ([4348d4f](https://github.com/VinciGit00/Scrapegraph-ai/commit/4348d4f4db6f30213acc1bbccebc2b143b4d2636)) +* **logging:** source code citation ([d139480](https://github.com/VinciGit00/Scrapegraph-ai/commit/d1394809d704bee4085d494ddebab772306b3b17)) +* template names ([b82f33a](https://github.com/VinciGit00/Scrapegraph-ai/commit/b82f33aee72515e4258e6f508fce15028eba5cbe)) +* **node-logging:** use centralized logger in each node for logging ([c251cc4](https://github.com/VinciGit00/Scrapegraph-ai/commit/c251cc45d3694f8e81503e38a6d2b362452b740e)) +* **web-loader:** use sublogger ([0790ecd](https://github.com/VinciGit00/Scrapegraph-ai/commit/0790ecd2083642af9f0a84583216ababe351cd76)) + + +### CI + +* **release:** 1.2.0-beta.1 [skip ci] ([fd3e0aa](https://github.com/VinciGit00/Scrapegraph-ai/commit/fd3e0aa5823509dfb46b4f597521c24d4eb345f1)) +* **release:** 1.3.0-beta.1 [skip ci] ([191db0b](https://github.com/VinciGit00/Scrapegraph-ai/commit/191db0bc779e4913713b47b68ec4162a347da3ea)) +* **release:** 1.4.0-beta.1 [skip ci] ([2caddf9](https://github.com/VinciGit00/Scrapegraph-ai/commit/2caddf9a99b5f3aedc1783216f21d23cd35b3a8c)) +* **release:** 1.4.0-beta.2 [skip ci] ([f1a2523](https://github.com/VinciGit00/Scrapegraph-ai/commit/f1a25233d650010e1932e0ab80938079a22a296d)) + +## [1.4.0-beta.2](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.4.0-beta.1...v1.4.0-beta.2) (2024-05-19) ### Features @@ -19,13 +98,16 @@ * add deepseek embeddings ([659fad7](https://github.com/VinciGit00/Scrapegraph-ai/commit/659fad770a5b6ace87511513e5233a3bc1269009)) + ## [1.3.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.2.4...v1.3.0) (2024-05-19) + ### Features * add new model ([8c7afa7](https://github.com/VinciGit00/Scrapegraph-ai/commit/8c7afa7570f0a104578deb35658168435cfe5ae1)) + ## [1.2.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.2.3...v1.2.4) (2024-05-17) diff --git a/README.md b/README.md index 32a12121..b190f125 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,6 @@ The reference page for Scrapegraph-ai is available on the official page of pypy: ```bash pip install scrapegraphai ``` -you will also need to install Playwright for javascript-based scraping: -```bash -playwright install -``` **Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱 @@ -49,6 +45,7 @@ There are three main scraping pipelines that can be used to extract information - `SmartScraperGraph`: single-page scraper that only needs a user prompt and an input source; - `SearchGraph`: multi-page scraper that extracts information from the top n search results of a search engine; - `SpeechGraph`: single-page scraper that extracts information from a website and generates an audio file. +- `SmartScraperMultiGraph`: multiple page scraper given a single prompt It is possible to use different LLM through APIs, such as **OpenAI**, **Groq**, **Azure** and **Gemini**, or local models using **Ollama**. @@ -171,7 +168,7 @@ Feel free to contribute and join our Discord server to discuss with us improveme Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/CONTRIBUTING.md). -[](https://discord.gg/gkxQDAjfeX) +[](https://discord.gg/uJN7TYcpNa) [](https://www.linkedin.com/company/scrapegraphai/) [](https://twitter.com/scrapegraphai) @@ -182,13 +179,14 @@ Wanna visualize the roadmap in a more interactive way? Check out the [markmap](h ## ❤️ Contributors [](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) + ## Sponsors
diff --git a/docs/source/conf.py b/docs/source/conf.py index a64cfb33..43c849c4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,7 +23,7 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon','sphinx_wagtail_theme'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon'] templates_path = ['_templates'] exclude_patterns = [] @@ -31,19 +31,9 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -# html_theme = 'sphinx_rtd_theme' -html_theme = 'sphinx_wagtail_theme' - -html_theme_options = dict( - project_name = "ScrapeGraphAI", - logo = "scrapegraphai_logo.png", - logo_alt = "ScrapeGraphAI", - logo_height = 59, - logo_url = "https://scrapegraph-ai.readthedocs.io/en/latest/", - logo_width = 45, - github_url = "https://github.com/VinciGit00/Scrapegraph-ai/tree/main/docs/source/", - footer_links = ",".join( - ["Landing Page|https://scrapegraphai.com/", - "Docusaurus|https://scrapegraph-doc.onrender.com/docs/intro"] - ), -) +html_theme = 'furo' +html_theme_options = { + "source_repository": "https://github.com/VinciGit00/Scrapegraph-ai/", + "source_branch": "main", + "source_directory": "docs/source/", +} \ No newline at end of file diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst index 55a7361d..4cbf7360 100644 --- a/docs/source/getting_started/installation.rst +++ b/docs/source/getting_started/installation.rst @@ -25,11 +25,18 @@ The library is available on PyPI, so it can be installed using the following com It is higly recommended to install the library in a virtual environment (conda, venv, etc.) -If your clone the repository, you can install the library using `poetry