From 4d936410ccaa3a4b810065e0e84b49b15c09fb28 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Fri, 28 Jun 2024 10:50:01 +0200 Subject: [PATCH 1/7] fix: add new claude model --- scrapegraphai/helpers/models_tokens.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 7af44531..867c8669 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -82,7 +82,8 @@ "claude2": 9000, "claude2.1": 200000, "claude3": 200000, - "claude3.5": 200000 + "claude3.5": 200000, + "claude-3-haiku-20240307": 200000 }, "bedrock": { "anthropic.claude-3-haiku-20240307-v1:0": 200000, From e40d10443fb3e6bb59b03c6c387d6ff3a588a04a Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 28 Jun 2024 08:51:17 +0000 Subject: [PATCH 2/7] ci(release): 1.7.5 [skip ci] ## [1.7.5](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.7.5) (2024-06-28) ### Bug Fixes * add new claude model ([4d93641](https://github.com/VinciGit00/Scrapegraph-ai/commit/4d936410ccaa3a4b810065e0e84b49b15c09fb28)) --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f359d94..cb34bce4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.7.5](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.7.5) (2024-06-28) + + +### Bug Fixes + +* add new claude model ([4d93641](https://github.com/VinciGit00/Scrapegraph-ai/commit/4d936410ccaa3a4b810065e0e84b49b15c09fb28)) + ## [1.7.4](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.3...v1.7.4) (2024-06-21) diff --git a/pyproject.toml b/pyproject.toml index f9df8d3e..1a65ac7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.7.4" +version = "1.7.5" From 5d2e592a672d4c8f2229d43f64a2676a12cbbe34 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Fri, 28 Jun 2024 10:53:40 +0200 Subject: [PATCH 3/7] add new claude models --- scrapegraphai/helpers/models_tokens.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 867c8669..6f1c7f8b 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -83,7 +83,10 @@ "claude2.1": 200000, "claude3": 200000, "claude3.5": 200000, - "claude-3-haiku-20240307": 200000 + "claude-3-opus-20240229": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-haiku-20240307": 200000, + "claude-3-5-sonnet-20240620": 200000 }, "bedrock": { "anthropic.claude-3-haiku-20240307-v1:0": 200000, From cb3cb763262450cf971bcc8348c76d4054bd0a4a Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 30 Jun 2024 15:00:03 +0000 Subject: [PATCH 4/7] ci(release): 1.8.0 [skip ci] ## [1.8.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.5...v1.8.0) (2024-06-30) ### Features * add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180)) * add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14)) ### Bug Fixes * updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f)) ### CI * **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) * **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) * **release:** 1.8.0-beta.1 [skip ci] ([bbfbbd9](https://github.com/VinciGit00/Scrapegraph-ai/commit/bbfbbd93be3c87c5f25e3c75ec7d677832d37467)) --- CHANGELOG.md | 20 ++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5854cf8a..e84fae1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,23 @@ +## [1.8.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.5...v1.8.0) (2024-06-30) + + +### Features + +* add new search engine avaiability and new tests ([073d226](https://github.com/VinciGit00/Scrapegraph-ai/commit/073d226723f5f03b960865d07408905b7a506180)) +* add research with bing + test function ([aa2160c](https://github.com/VinciGit00/Scrapegraph-ai/commit/aa2160c108764745a696ffc16038f370e9702c14)) + + +### Bug Fixes + +* updated for schema changes ([aedda44](https://github.com/VinciGit00/Scrapegraph-ai/commit/aedda448682ce5a921a62e661bffb02478bab75f)) + + +### CI + +* **release:** 1.7.0-beta.13 [skip ci] ([ce0a47a](https://github.com/VinciGit00/Scrapegraph-ai/commit/ce0a47aee5edbb26fd82e41f6688a4bc48a10822)) +* **release:** 1.7.0-beta.14 [skip ci] ([ec77ff7](https://github.com/VinciGit00/Scrapegraph-ai/commit/ec77ff7ea4eb071469c2fb53e5959d4ea1f73ad6)) +* **release:** 1.8.0-beta.1 [skip ci] ([bbfbbd9](https://github.com/VinciGit00/Scrapegraph-ai/commit/bbfbbd93be3c87c5f25e3c75ec7d677832d37467)) + ## [1.8.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.4...v1.8.0-beta.1) (2024-06-25) diff --git a/pyproject.toml b/pyproject.toml index 8d1b6f68..a4f6b2be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.8.0b1" +version = "1.8.0" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." From 3e644f498f05eb505fbd4e94b144c81567569aaa Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Mon, 1 Jul 2024 10:56:28 +0200 Subject: [PATCH 5/7] docs(roadmap): next steps --- README.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 977243e3..2760ab60 100644 --- a/README.md +++ b/README.md @@ -191,9 +191,32 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra [![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/scrapegraphai) ## 📈 Roadmap -Check out the project roadmap [here](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/README.md)! 🚀 -Wanna visualize the roadmap in a more interactive way? Check out the [markmap](https://markmap.js.org/repl) visualization by copy pasting the markdown content in the editor! +We are working on the following features! If you are interested in collaborating click on the feature and file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github! + +```mermaid +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%% +graph LR + A[DeepSearch Graph] --> F[Use Existing Chromium Instances] + F --> B[Page Caching] + B --> C[Screenshot Scraping] + C --> D[Handle Dynamic Content] + D --> E[New Webdrivers] + + style A fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style F fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style B fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style C fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style D fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + style E fill:#ffffff,stroke:#5C4B9B,stroke-width:2px,rx:10,ry:10 + + click A href "https://github.com/VinciGit00/Scrapegraph-ai/issues/260" "Open DeepSearch Graph Issue" + click F href "https://github.com/VinciGit00/Scrapegraph-ai/issues/329" "Open Chromium Instances Issue" + click B href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Page Caching Issue" + click C href "https://github.com/VinciGit00/Scrapegraph-ai/issues/197" "Open Screenshot Scraping Issue" + click D href "https://github.com/VinciGit00/Scrapegraph-ai/issues/279" "Open Handle Dynamic Content Issue" + click E href "https://github.com/VinciGit00/Scrapegraph-ai/issues/171" "Open New Webdrivers Issue" +``` ## ❤️ Contributors [![Contributors](https://contrib.rocks/image?repo=VinciGit00/Scrapegraph-ai)](https://github.com/VinciGit00/Scrapegraph-ai/graphs/contributors) From 14faba4f00dd9f947f8dc5e0b51be49ea684179f Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Mon, 1 Jul 2024 11:01:52 +0200 Subject: [PATCH 6/7] docs(roadmap): fix urls --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2760ab60..11def085 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,7 @@ Please see the [contributing guidelines](https://github.com/VinciGit00/Scrapegra ## 📈 Roadmap -We are working on the following features! If you are interested in collaborating click on the feature and file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github! +We are working on the following features! If you are interested in collaborating right-click on the feature and open in a new tab to file a PR. If you have doubts and wanna discuss them with us, just contact us on [discord](https://discord.gg/uJN7TYcpNa) or open a [Discussion](https://github.com/VinciGit00/Scrapegraph-ai/discussions) here on Github! ```mermaid %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#5C4B9B', 'edgeLabelBackground':'#ffffff', 'tertiaryColor': '#ffffff', 'primaryBorderColor': '#5C4B9B', 'fontFamily': 'Arial', 'fontSize': '16px', 'textColor': '#5C4B9B' }}}%% From 3f9bf5b4d4384e642d0fce84cd188936c2b30ca7 Mon Sep 17 00:00:00 2001 From: AmosDinh <39965380+AmosDinh@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:42:10 +0000 Subject: [PATCH 7/7] fixed typo --- scrapegraphai/nodes/search_internet_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 59c56975..97fed67b 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -84,7 +84,7 @@ def execute(self, state: dict) -> dict: You should return only the query string without any additional sentences. \n For example, if the user prompt is "What is the capital of France?", you should return "capital of France". \n - If yuo return something else, you will get a really bad grade. \n + If you return something else, you will get a really bad grade. \n USER PROMPT: {user_prompt}""" search_prompt = PromptTemplate(