diff --git a/CHANGELOG.md b/CHANGELOG.md index a69adb32..e84fae1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,23 @@ +## [1.8.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.5...v1.8.0) (2024-06-30) + + +### Features + +* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180)) +* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14)) + + +### Bug Fixes + +* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f)) + + +### CI + +* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) +* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) +* **release:** 1.8.0-beta.1 [skip ci] ([bbfbbd9](https://github.com/VinciGit00/Scrapegraph-ai/commit/bbfbbd93be3c87c5f25e3c75ec7d677832d37467)) + ## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25) @@ -7,6 +27,7 @@ * add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14)) + ### Bug Fixes * updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f)) @@ -17,6 +38,7 @@ * **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) * **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) + ## [1.7.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.3...v1.7.4) (2024-06-21) diff --git a/README.md b/README.md index 977243e3..11def085 100644 --- a/README.md +++ b/README.md @@ -191,9 +191,32 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra [![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/scrapegraphai) ## 📈 Roadmap -Check out the project roadmap [here](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/README.md)! 🚀 -Wanna visualize the roadmap in a more interactive way? Check out the [markmap](https://markmap.js.org/repl) visualization by copy pasting the markdown content in the editor! +We are working on the following features! If you are interested in collaborating right-click on the feature and open in a new tab to file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github! + +```mermaid +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%% +graph LR + A[DeepSearch Graph] --> F[Use Existing Chromium Instances] + F --> B[Page Caching] + B --> C[Screenshot Scraping] + C --> D[Handle Dynamic Content] + D --> E[New Webdrivers] + + style A fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style F fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style B fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style C fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style D fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style E fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + + click A href "https://github.com/VinciGit00/Scrapegraph-ai/issues/260" "Open DeepSearch Graph Issue" + click F href "https://github.com/VinciGit00/Scrapegraph-ai/issues/329" "Open Chromium Instances Issue" + click B href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Page Caching Issue" + click C href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Screenshot Scraping Issue" + click D href "https://github.com/VinciGit00/Scrapegraph-ai/issues/279" "Open Handle Dynamic Content Issue" + click E href "https://github.com/VinciGit00/Scrapegraph-ai/issues/171" "Open New Webdrivers Issue" +``` ## ❤️ Contributors [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) diff --git a/pyproject.toml b/pyproject.toml index 0df19e6f..a4f6b2be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ name = "scrapegraphai" -version = "1.8.0b1" - +version = "1.8.0" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 7af44531..6f1c7f8b 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -82,7 +82,11 @@ "claude2": 9000, "claude2.1": 200000, "claude3": 200000, - "claude3.5": 200000 + "claude3.5": 200000, + "claude-3-opus-20240229": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-haiku-20240307": 200000, + "claude-3-5-sonnet-20240620": 200000 }, "bedrock": { "anthropic.claude-3-haiku-20240307-v1:0": 200000, diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 59c56975..97fed67b 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -84,7 +84,7 @@ def execute(self, state: dict) -> dict: You should return only the query string without any additional sentences. \n For example, if the user prompt is "What is the capital of France?", you should return "capital of France". \n - If yuo return something else, you will get a really bad grade. \n + If you return something else, you will get a really bad grade. \n USER PROMPT: {user_prompt}""" search_prompt = PromptTemplate(