From e3fbf01c5b48a28878a541df628e9703f249db7d Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra <88108002+VinciGit00@users.noreply.github.com> Date: Sun, 25 Aug 2024 10:54:19 +0200 Subject: [PATCH 01/21] Revert "Anthropic refactoring" --- ...multi_anthropic.py => csv_scraper_graph_multi_haiku.py} | 2 +- .../{csv_scraper_anthropic.py => csv_scraper_haiku.py} | 2 +- .../{custom_graph_anthropic.py => custom_graph_haiku.py} | 2 +- .../{json_scraper_anthropic.py => json_scraper_haiku.py} | 2 +- ...aper_multi_anthropic.py => json_scraper_multi_haiku.py} | 2 +- ...raper_graph_anthropic.py => pdf_scraper_graph_haiku.py} | 2 +- ...raper_multi_anthropic.py => pdf_scraper_multi_haiku.py} | 2 +- ..._plain_text_anthropic.py => scrape_plain_text_haiku.py} | 2 +- ...pt_generator_anthropic.py => script_generator_haiku.py} | 2 +- ...erator_anthropic.py => script_multi_generator_haiku.py} | 2 +- .../{search_graph_anthropic.py => search_graph_haiku.py} | 2 +- ...ph_schema_anthropic.py => search_graph_schema_haiku.py} | 5 ++--- ..._link_graph_anthropic.py => search_link_graph_haiku.py} | 7 ++----- .../{smart_scraper_anthropic.py => smart_scraper_haiku.py} | 2 +- ...per_multi_anthropic.py => smart_scraper_multi_haiku.py} | 2 +- ...r_schema_anthropic.py => smart_scraper_schema_haiku.py} | 2 +- ...multi_anthropic.py => xml_scraper_graph_multi_haiku.py} | 2 +- .../{xml_scraper_anthropic.py => xml_scraper_haiku.py} | 2 +- scrapegraphai/graphs/abstract_graph.py | 7 +++---- 19 files changed, 23 insertions(+), 28 deletions(-) rename examples/anthropic/{csv_scraper_graph_multi_anthropic.py => csv_scraper_graph_multi_haiku.py} (96%) rename examples/anthropic/{csv_scraper_anthropic.py => csv_scraper_haiku.py} (96%) rename examples/anthropic/{custom_graph_anthropic.py => custom_graph_haiku.py} (97%) rename examples/anthropic/{json_scraper_anthropic.py => json_scraper_haiku.py} (96%) rename examples/anthropic/{json_scraper_multi_anthropic.py => json_scraper_multi_haiku.py} (93%) rename examples/anthropic/{pdf_scraper_graph_anthropic.py => pdf_scraper_graph_haiku.py} (96%) rename examples/anthropic/{pdf_scraper_multi_anthropic.py => pdf_scraper_multi_haiku.py} (99%) rename examples/anthropic/{scrape_plain_text_anthropic.py => scrape_plain_text_haiku.py} (96%) rename examples/anthropic/{script_generator_anthropic.py => script_generator_haiku.py} (95%) rename examples/anthropic/{script_multi_generator_anthropic.py => script_multi_generator_haiku.py} (96%) rename examples/anthropic/{search_graph_anthropic.py => search_graph_haiku.py} (95%) rename examples/anthropic/{search_graph_schema_anthropic.py => search_graph_schema_haiku.py} (92%) rename examples/anthropic/{search_link_graph_anthropic.py => search_link_graph_haiku.py} (91%) rename examples/anthropic/{smart_scraper_anthropic.py => smart_scraper_haiku.py} (96%) rename examples/anthropic/{smart_scraper_multi_anthropic.py => smart_scraper_multi_haiku.py} (96%) rename examples/anthropic/{smart_scraper_schema_anthropic.py => smart_scraper_schema_haiku.py} (96%) rename examples/anthropic/{xml_scraper_graph_multi_anthropic.py => xml_scraper_graph_multi_haiku.py} (96%) rename examples/anthropic/{xml_scraper_anthropic.py => xml_scraper_haiku.py} (96%) diff --git a/examples/anthropic/csv_scraper_graph_multi_anthropic.py b/examples/anthropic/csv_scraper_graph_multi_haiku.py similarity index 96% rename from examples/anthropic/csv_scraper_graph_multi_anthropic.py rename to examples/anthropic/csv_scraper_graph_multi_haiku.py index fcc297ab..b833af01 100644 --- a/examples/anthropic/csv_scraper_graph_multi_anthropic.py +++ b/examples/anthropic/csv_scraper_graph_multi_haiku.py @@ -26,7 +26,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000}, } diff --git a/examples/anthropic/csv_scraper_anthropic.py b/examples/anthropic/csv_scraper_haiku.py similarity index 96% rename from examples/anthropic/csv_scraper_anthropic.py rename to examples/anthropic/csv_scraper_haiku.py index 01a26a2b..2e0ebe81 100644 --- a/examples/anthropic/csv_scraper_anthropic.py +++ b/examples/anthropic/csv_scraper_haiku.py @@ -32,7 +32,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/custom_graph_anthropic.py b/examples/anthropic/custom_graph_haiku.py similarity index 97% rename from examples/anthropic/custom_graph_anthropic.py rename to examples/anthropic/custom_graph_haiku.py index 6d787484..cea14361 100644 --- a/examples/anthropic/custom_graph_anthropic.py +++ b/examples/anthropic/custom_graph_haiku.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/json_scraper_anthropic.py b/examples/anthropic/json_scraper_haiku.py similarity index 96% rename from examples/anthropic/json_scraper_anthropic.py rename to examples/anthropic/json_scraper_haiku.py index 05ee7fdf..2610b658 100644 --- a/examples/anthropic/json_scraper_anthropic.py +++ b/examples/anthropic/json_scraper_haiku.py @@ -26,7 +26,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/json_scraper_multi_anthropic.py b/examples/anthropic/json_scraper_multi_haiku.py similarity index 93% rename from examples/anthropic/json_scraper_multi_anthropic.py rename to examples/anthropic/json_scraper_multi_haiku.py index c07fc54f..0327673b 100644 --- a/examples/anthropic/json_scraper_multi_anthropic.py +++ b/examples/anthropic/json_scraper_multi_haiku.py @@ -11,7 +11,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/pdf_scraper_graph_anthropic.py b/examples/anthropic/pdf_scraper_graph_haiku.py similarity index 96% rename from examples/anthropic/pdf_scraper_graph_anthropic.py rename to examples/anthropic/pdf_scraper_graph_haiku.py index 3e4191a6..61be06b4 100644 --- a/examples/anthropic/pdf_scraper_graph_anthropic.py +++ b/examples/anthropic/pdf_scraper_graph_haiku.py @@ -14,7 +14,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/pdf_scraper_multi_anthropic.py b/examples/anthropic/pdf_scraper_multi_haiku.py similarity index 99% rename from examples/anthropic/pdf_scraper_multi_anthropic.py rename to examples/anthropic/pdf_scraper_multi_haiku.py index 0c842787..974dd2f8 100644 --- a/examples/anthropic/pdf_scraper_multi_anthropic.py +++ b/examples/anthropic/pdf_scraper_multi_haiku.py @@ -11,7 +11,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/scrape_plain_text_anthropic.py b/examples/anthropic/scrape_plain_text_haiku.py similarity index 96% rename from examples/anthropic/scrape_plain_text_anthropic.py rename to examples/anthropic/scrape_plain_text_haiku.py index 7ebf84da..d3f36638 100644 --- a/examples/anthropic/scrape_plain_text_anthropic.py +++ b/examples/anthropic/scrape_plain_text_haiku.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/script_generator_anthropic.py b/examples/anthropic/script_generator_haiku.py similarity index 95% rename from examples/anthropic/script_generator_anthropic.py rename to examples/anthropic/script_generator_haiku.py index 160987cc..889ce0b5 100644 --- a/examples/anthropic/script_generator_anthropic.py +++ b/examples/anthropic/script_generator_haiku.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/script_multi_generator_anthropic.py b/examples/anthropic/script_multi_generator_haiku.py similarity index 96% rename from examples/anthropic/script_multi_generator_anthropic.py rename to examples/anthropic/script_multi_generator_haiku.py index c4b3f09b..f7c69010 100644 --- a/examples/anthropic/script_multi_generator_anthropic.py +++ b/examples/anthropic/script_multi_generator_haiku.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, "library": "beautifulsoup" diff --git a/examples/anthropic/search_graph_anthropic.py b/examples/anthropic/search_graph_haiku.py similarity index 95% rename from examples/anthropic/search_graph_anthropic.py rename to examples/anthropic/search_graph_haiku.py index 4ae0e6b3..f90d7598 100644 --- a/examples/anthropic/search_graph_anthropic.py +++ b/examples/anthropic/search_graph_haiku.py @@ -15,7 +15,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/search_graph_schema_anthropic.py b/examples/anthropic/search_graph_schema_haiku.py similarity index 92% rename from examples/anthropic/search_graph_schema_anthropic.py rename to examples/anthropic/search_graph_schema_haiku.py index 58e1ca0f..c9e7a875 100644 --- a/examples/anthropic/search_graph_schema_anthropic.py +++ b/examples/anthropic/search_graph_schema_haiku.py @@ -27,9 +27,8 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "claude-3-haiku-20240307", + "max_tokens": 4000}, } # ************************************************ diff --git a/examples/anthropic/search_link_graph_anthropic.py b/examples/anthropic/search_link_graph_haiku.py similarity index 91% rename from examples/anthropic/search_link_graph_anthropic.py rename to examples/anthropic/search_link_graph_haiku.py index 4d671817..ccfbc1d2 100644 --- a/examples/anthropic/search_link_graph_anthropic.py +++ b/examples/anthropic/search_link_graph_haiku.py @@ -29,11 +29,8 @@ # ************************************************ graph_config = { - "llm": { - "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "llm": {"model_instance": llm_model_instance}, + "embeddings": {"model_instance": embedder_model_instance} } # ************************************************ diff --git a/examples/anthropic/smart_scraper_anthropic.py b/examples/anthropic/smart_scraper_haiku.py similarity index 96% rename from examples/anthropic/smart_scraper_anthropic.py rename to examples/anthropic/smart_scraper_haiku.py index 612363c7..f0bb2a57 100644 --- a/examples/anthropic/smart_scraper_anthropic.py +++ b/examples/anthropic/smart_scraper_haiku.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/smart_scraper_multi_anthropic.py b/examples/anthropic/smart_scraper_multi_haiku.py similarity index 96% rename from examples/anthropic/smart_scraper_multi_anthropic.py rename to examples/anthropic/smart_scraper_multi_haiku.py index 6e2af361..eb2001d4 100644 --- a/examples/anthropic/smart_scraper_multi_anthropic.py +++ b/examples/anthropic/smart_scraper_multi_haiku.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/examples/anthropic/smart_scraper_schema_anthropic.py b/examples/anthropic/smart_scraper_schema_haiku.py similarity index 96% rename from examples/anthropic/smart_scraper_schema_anthropic.py rename to examples/anthropic/smart_scraper_schema_haiku.py index 0a444923..83cedd2a 100644 --- a/examples/anthropic/smart_scraper_schema_anthropic.py +++ b/examples/anthropic/smart_scraper_schema_haiku.py @@ -33,7 +33,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000}, } diff --git a/examples/anthropic/xml_scraper_graph_multi_anthropic.py b/examples/anthropic/xml_scraper_graph_multi_haiku.py similarity index 96% rename from examples/anthropic/xml_scraper_graph_multi_anthropic.py rename to examples/anthropic/xml_scraper_graph_multi_haiku.py index 31b350c2..6b79f709 100644 --- a/examples/anthropic/xml_scraper_graph_multi_anthropic.py +++ b/examples/anthropic/xml_scraper_graph_multi_haiku.py @@ -26,7 +26,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000}, } diff --git a/examples/anthropic/xml_scraper_anthropic.py b/examples/anthropic/xml_scraper_haiku.py similarity index 96% rename from examples/anthropic/xml_scraper_anthropic.py rename to examples/anthropic/xml_scraper_haiku.py index cd60f0d6..dd64f571 100644 --- a/examples/anthropic/xml_scraper_anthropic.py +++ b/examples/anthropic/xml_scraper_haiku.py @@ -26,7 +26,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", + "model": "claude-3-haiku-20240307", "max_tokens": 4000 }, } diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index f80e430d..555e8211 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -142,7 +142,7 @@ def handle_model(model_name, provider, token_key, default_token=8192): known_models = {"chatgpt","gpt","openai", "azure_openai", "google_genai", "ollama", "oneapi", "nvidia", "groq", "google_vertexai", "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", - "fireworks", "anthropic"} + "fireworks", "claude-3-"} if llm_params["model"].split("/")[0] not in known_models and llm_params["model"].split("-")[0] not in known_models: raise ValueError(f"Model '{llm_params['model']}' is not supported") @@ -172,9 +172,8 @@ def handle_model(model_name, provider, token_key, default_token=8192): token_key = model_name if "model_tokens" not in llm_params else llm_params["model_tokens"] return handle_model(model_name, "ollama", token_key) - elif "anthropic" in llm_params["model"]: - model_name = llm_params["model"].split("anthropic/")[-1] - return handle_model(model_name, "anthropic", model_name) + elif "claude-3-" in llm_params["model"]: + return handle_model(llm_params["model"], "anthropic", "claude3") elif llm_params["model"].startswith("mistral"): model_name = llm_params["model"].split("/")[-1] From 35b994a8cd201bcde86038dcde6933946a4fe84f Mon Sep 17 00:00:00 2001 From: Jamie Beck Date: Mon, 26 Aug 2024 11:30:09 -0400 Subject: [PATCH 02/21] fix model_tokens not being used for ollama I am passing in the explicit model_tokens from user config as the default_token so it will correctly fallback to the users setting if the model is not found --- scrapegraphai/graphs/abstract_graph.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index ae1e90b2..e76a2ed7 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -174,8 +174,9 @@ def handle_model(model_name, provider, token_key, default_token=8192): elif "ollama" in llm_params["model"]: model_name = llm_params["model"].split("ollama/")[-1] - token_key = model_name if "model_tokens" not in llm_params else llm_params["model_tokens"] - return handle_model(model_name, "ollama", token_key) + token_key = model_name if "model_tokens" not in llm_params else None + explicit_model_tokens = 8192 if "model_tokens" not in llm_params else llm_params["model_tokens"] + return handle_model(model_name, "ollama", token_key, explicit_model_tokens) elif "claude-3-" in llm_params["model"]: return handle_model(llm_params["model"], "anthropic", "claude3") @@ -271,4 +272,4 @@ def _create_graph(self): def run(self) -> str: """ Abstract method to execute the graph and return the result. - """ \ No newline at end of file + """ From c1ce9c69d4ba746d488891d18fa64460e76124bf Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 26 Aug 2024 15:56:18 +0000 Subject: [PATCH 03/21] ci(release): 1.15.0-beta.4 [skip ci] ## [1.15.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.3...v1.15.0-beta.4) (2024-08-26) ### Bug Fixes * add claude3.5 sonnet ([ee8f8b3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ee8f8b31ecfe4ffd311528d2f48cb055e4609d99)) ### CI * **release:** 1.14.1 [skip ci] ([88e76ce](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/88e76ceedb39dc1b41222e9a5cb8a6f0d81cadf4)) --- CHANGELOG.md | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe0f5900..280eb016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## [1.15.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.3...v1.15.0-beta.4) (2024-08-26) + + +### Bug Fixes + +* add claude3.5 sonnet ([ee8f8b3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ee8f8b31ecfe4ffd311528d2f48cb055e4609d99)) + + +### CI + +* **release:** 1.14.1 [skip ci] ([88e76ce](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/88e76ceedb39dc1b41222e9a5cb8a6f0d81cadf4)) + ## [1.14.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0...v1.14.1) (2024-08-24) diff --git a/pyproject.toml b/pyproject.toml index 80072b43..cdca86c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.14.1" +version = "1.15.0b4" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From 04128e7e9f585aaf774fabf646c4d9d3b96b8333 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Mon, 26 Aug 2024 18:00:17 +0200 Subject: [PATCH 04/21] fix: abstract graph local model --- scrapegraphai/graphs/abstract_graph.py | 4 ++-- scrapegraphai/helpers/models_tokens.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 71773ff6..2dcc54f9 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -170,8 +170,8 @@ def handle_model(model_name, provider, token_key, default_token=8192): elif "ollama" in llm_params["model"]: model_name = llm_params["model"].split("ollama/")[-1] token_key = model_name if "model_tokens" not in llm_params else None - explicit_model_tokens = 8192 if "model_tokens" not in llm_params else llm_params["model_tokens"] - return handle_model(model_name, "ollama", token_key, explicit_model_tokens) + model_tokens = 8192 if "model_tokens" not in llm_params else llm_params["model_tokens"] + return handle_model(model_name, "ollama", token_key, model_tokens) elif "claude-3-" in llm_params["model"]: return handle_model(llm_params["model"], "anthropic", "claude3") diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 1e3e1910..7677a901 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -74,6 +74,7 @@ "llama3.1:70b": 128000, "lama3.1:405b": 128000, "scrapegraph": 8192, + "mistral": 8192, "llava": 4096, "mixtral:8x22b-instruct": 65536, "mistral-openorca": 32000, From 22ab45f6bda3a12ab01c743fd124448a2e26cd46 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 26 Aug 2024 16:01:41 +0000 Subject: [PATCH 05/21] ci(release): 1.15.0-beta.5 [skip ci] ## [1.15.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.4...v1.15.0-beta.5) (2024-08-26) ### Bug Fixes * abstract graph local model ([04128e7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04128e7e9f585aaf774fabf646c4d9d3b96b8333)) --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 280eb016..f82724e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.15.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.4...v1.15.0-beta.5) (2024-08-26) + + +### Bug Fixes + +* abstract graph local model ([04128e7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04128e7e9f585aaf774fabf646c4d9d3b96b8333)) + ## [1.15.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.3...v1.15.0-beta.4) (2024-08-26) diff --git a/pyproject.toml b/pyproject.toml index cdca86c3..3616c032 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.15.0b4" +version = "1.15.0b5" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From f73343f19386b31878706963597c2565a023068d Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:09:58 +0200 Subject: [PATCH 06/21] fix(AbstractGraph): correct and simplify instancing logic --- scrapegraphai/graphs/abstract_graph.py | 130 +++++++------------------ scrapegraphai/helpers/models_tokens.py | 4 +- 2 files changed, 39 insertions(+), 95 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 2dcc54f9..03fd30e2 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -125,103 +125,47 @@ def _create_llm(self, llm_config: dict) -> object: self.model_token = llm_params["model_tokens"] except KeyError as exc: raise KeyError("model_tokens not specified") from exc - return llm_params["model_instance"] - - def handle_model(model_name, provider, token_key, default_token=8192): - try: - self.model_token = models_tokens[provider][token_key] - except KeyError: - print(f"Model not found, using default token size ({default_token})") - self.model_token = default_token - llm_params["model_provider"] = provider - llm_params["model"] = model_name - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - return init_chat_model(**llm_params) - - known_models = {"chatgpt","gpt","openai", "azure_openai", "google_genai", - "ollama", "oneapi", "nvidia", "groq", "google_vertexai", - "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", - "fireworks", "claude-3-"} - - if llm_params["model"].split("/")[0] not in known_models and llm_params["model"].split("-")[0] not in known_models: - raise ValueError(f"Model '{llm_params['model']}' is not supported") - + return llm_params["model_instance"] + + known_providers = {"openai", "azure_openai", "google_genai", "google_vertexai", + "ollama", "oneapi", "nvidia", "groq", "anthropic" "bedrock", "mistralai", + "hugging_face", "deepseek", "ernie", "fireworks"} + + split_model_provider = llm_params["model"].split("/") + llm_params["model_provider"] = split_model_provider[0] + llm_params["model"] = split_model_provider[1:] + + if llm_params["model_provider"] not in known_providers: + raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.") + try: - if "fireworks" in llm_params["model"]: - model_name = "/".join(llm_params["model"].split("/")[1:]) - token_key = llm_params["model"].split("/")[-1] - return handle_model(model_name, "fireworks", token_key) - - elif "gemini" in llm_params["model"]: - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "google_genai", model_name) - - elif llm_params["model"].startswith("claude"): - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "anthropic", model_name) - - elif llm_params["model"].startswith("vertexai"): - return handle_model(llm_params["model"], "google_vertexai", llm_params["model"]) - - elif "gpt-" in llm_params["model"]: - return handle_model(llm_params["model"], "openai", llm_params["model"]) - - elif "ollama" in llm_params["model"]: - model_name = llm_params["model"].split("ollama/")[-1] - token_key = model_name if "model_tokens" not in llm_params else None - model_tokens = 8192 if "model_tokens" not in llm_params else llm_params["model_tokens"] - return handle_model(model_name, "ollama", token_key, model_tokens) - - elif "claude-3-" in llm_params["model"]: - return handle_model(llm_params["model"], "anthropic", "claude3") - - elif llm_params["model"].startswith("mistral"): - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "mistralai", model_name) - - elif "deepseek" in llm_params["model"]: - try: - self.model_token = models_tokens["deepseek"][llm_params["model"]] - except KeyError: - print("model not found, using default token size (8192)") - self.model_token = 8192 - return DeepSeek(llm_params) - - elif "ernie" in llm_params["model"]: - from langchain_community.chat_models import ErnieBotChat - - try: - self.model_token = models_tokens["ernie"][llm_params["model"]] - except KeyError: - print("model not found, using default token size (8192)") - self.model_token = 8192 - return ErnieBotChat(llm_params) - - elif "oneapi" in llm_params["model"]: - llm_params["model"] = llm_params["model"].split("/")[-1] - try: - self.model_token = models_tokens["oneapi"][llm_params["model"]] - except KeyError: - raise KeyError("Model not supported") - return OneApi(llm_params) - - elif "nvidia" in llm_params["model"]: - from langchain_nvidia_ai_endpoints import ChatNVIDIA - - try: - self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]] - llm_params["model"] = "/".join(llm_params["model"].split("/")[1:]) - except KeyError: - raise KeyError("Model not supported") - return ChatNVIDIA(llm_params) + self.model_token = models_tokens[llm_params["model"]][llm_params["model"]] + except KeyError: + print("Model not found, using default token size (8192)") + self.model_token = 8192 + try: + if llm_params["model_provider"] not in {"oneapi", "nvidia", "ernie", "deepseek"}: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return init_chat_model(**llm_params) else: - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, llm_params["model"], model_name) + if "deepseek" in llm_params["model"]: + return DeepSeek(**llm_params) + + if "ernie" in llm_params["model"]: + from langchain_community.chat_models import ErnieBotChat + return ErnieBotChat(**llm_params) + + if "oneapi" in llm_params["model"]: + return OneApi(**llm_params) + + if "nvidia" in llm_params["model"]: + from langchain_nvidia_ai_endpoints import ChatNVIDIA + return ChatNVIDIA(**llm_params) - except KeyError as e: - print(f"Model not supported: {e}") + except Exception as e: + print(f"Error instancing model: {e}") def get_state(self, key=None) -> dict: diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 7677a901..d4c9f39e 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -102,7 +102,7 @@ "oneapi": { "qwen-turbo": 6000, }, - "nvdia": { + "nvidia": { "meta/llama3-70b-instruct": 419, "meta/llama3-8b-instruct": 419, "nemotron-4-340b-instruct": 1024, @@ -127,7 +127,7 @@ "gemma-7b-it": 8192, "claude-3-haiku-20240307'": 8192, }, - "claude": { + "anthropic": { "claude_instant": 100000, "claude2": 9000, "claude2.1": 200000, From f6df9b75125b4cacbef4af29faf3e17a13ff108c Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:30:48 +0200 Subject: [PATCH 07/21] chore(examples): update model names --- .../csv_scraper_graph_multi_haiku.py | 4 ++-- examples/anthropic/csv_scraper_haiku.py | 5 ++-- examples/anthropic/custom_graph_haiku.py | 24 ++++--------------- examples/anthropic/json_scraper_haiku.py | 5 ++-- .../anthropic/json_scraper_multi_haiku.py | 5 ++-- examples/anthropic/pdf_scraper_graph_haiku.py | 5 ++-- examples/anthropic/pdf_scraper_multi_haiku.py | 5 ++-- examples/anthropic/scrape_plain_text_haiku.py | 5 ++-- examples/anthropic/script_generator_haiku.py | 5 ++-- .../anthropic/script_multi_generator_haiku.py | 7 +++--- examples/anthropic/search_graph_haiku.py | 5 ++-- .../anthropic/search_graph_schema_haiku.py | 4 ++-- examples/anthropic/search_link_graph_haiku.py | 20 ++++------------ examples/anthropic/smart_scraper_haiku.py | 5 ++-- .../anthropic/smart_scraper_multi_haiku.py | 5 ++-- .../anthropic/smart_scraper_schema_haiku.py | 4 ++-- .../xml_scraper_graph_multi_haiku.py | 4 ++-- examples/anthropic/xml_scraper_haiku.py | 5 ++-- examples/deepseek/csv_scraper_deepseek.py | 2 +- .../csv_scraper_graph_multi_deepseek.py | 2 +- examples/deepseek/json_scraper_deepseek.py | 2 +- .../deepseek/json_scraper_multi_deepseek.py | 2 +- .../deepseek/pdf_scraper_graph_deepseek.py | 2 +- .../deepseek/pdf_scraper_multi_deepseek.py | 2 +- .../deepseek/scrape_plain_text_deepseek.py | 2 +- .../deepseek/script_generator_deepseek.py | 2 +- .../script_multi_generator_deepseek.py | 2 +- examples/deepseek/search_graph_deepseek.py | 2 +- .../deepseek/search_graph_schema_deepseek.py | 2 +- .../deepseek/search_link_graph_deepseek.py | 2 +- examples/deepseek/smart_scraper_deepseek.py | 2 +- .../deepseek/smart_scraper_multi_deepseek.py | 2 +- .../deepseek/smart_scraper_schema_deepseek.py | 2 +- examples/deepseek/xml_scraper_deepseek.py | 2 +- .../xml_scraper_graph_multi_deepseek.py | 2 +- examples/ernie/csv_scraper_ernie.py | 14 ++++------- examples/ernie/custom_graph_ernie.py | 20 +++++++--------- examples/ernie/deep_scraper_ernie.py | 2 +- examples/ernie/json_scraper_ernie.py | 18 ++++++-------- examples/ernie/pdf_scraper_graph_ernie.py | 14 ++++------- examples/ernie/scrape_plain_text_ernie.py | 18 ++++++-------- examples/ernie/script_generator_ernie.py | 10 ++++---- .../ernie/script_multi_generator_ernie.py | 16 +++++-------- examples/ernie/search_graph_ernie.py | 16 +++++-------- examples/ernie/search_link_graph_ernie.py | 16 +++++-------- examples/ernie/smart_scraper_ernie.py | 13 +++++----- examples/ernie/smart_scraper_multi_ernie.py | 10 ++++---- examples/ernie/smart_scraper_schema_ernie.py | 8 ++++--- examples/ernie/speech_graph_ernie.py | 7 +++--- examples/ernie/xml_scraper_ernie.py | 6 +++-- .../csv_scraper_graph_multi_mistral.py | 2 +- examples/mistral/csv_scraper_mistral.py | 2 +- examples/mistral/custom_graph_mistral.py | 2 +- examples/mistral/deep_scraper_mistral.py | 2 +- examples/mistral/json_scraper_mistral.py | 2 +- .../mistral/json_scraper_multi_mistral.py | 2 +- examples/mistral/md_scraper_mistral.py | 2 +- examples/mistral/pdf_scraper_mistral.py | 2 +- examples/mistral/pdf_scraper_multi_mistral.py | 2 +- examples/mistral/scrape_plain_text_mistral.py | 2 +- examples/mistral/script_generator_mistral.py | 2 +- .../script_generator_schema_mistral.py | 2 +- .../mistral/script_multi_generator_mistral.py | 2 +- examples/mistral/search_graph_mistral.py | 2 +- .../mistral/search_graph_schema_mistral.py | 2 +- examples/mistral/search_link_graph_mistral.py | 2 +- examples/mistral/smart_scraper_mistral.py | 2 +- .../mistral/smart_scraper_multi_mistral.py | 2 +- .../mistral/smart_scraper_schema_mistral.py | 2 +- .../xml_scraper_graph_multi_mistral.py | 2 +- examples/mistral/xml_scraper_mistral.py | 2 +- .../openai/csv_scraper_graph_multi_openai.py | 2 +- examples/openai/csv_scraper_openai.py | 2 +- examples/openai/custom_graph_openai.py | 2 +- examples/openai/deep_scraper_openai.py | 2 +- examples/openai/json_scraper_multi_openai.py | 2 +- examples/openai/json_scraper_openai.py | 2 +- examples/openai/md_scraper_openai.py | 2 +- examples/openai/omni_scraper_openai.py | 2 +- examples/openai/omni_search_openai.py | 2 +- examples/openai/pdf_scraper_multi_openai.py | 2 +- examples/openai/pdf_scraper_openai.py | 2 +- examples/openai/scrape_plain_text_openai.py | 2 +- examples/openai/screenshot_scraper.py | 2 +- examples/openai/script_generator_openai.py | 2 +- .../openai/script_generator_schema_openai.py | 2 +- .../openai/script_multi_generator_openai.py | 2 +- examples/openai/search_graph_openai.py | 2 +- examples/openai/search_graph_schema_openai.py | 2 +- examples/openai/search_link_graph_openai.py | 2 +- examples/openai/smart_scraper_multi_openai.py | 2 +- examples/openai/smart_scraper_openai.py | 2 +- .../openai/smart_scraper_schema_openai.py | 2 +- examples/openai/speech_graph_openai.py | 2 +- .../openai/xml_scraper_graph_multi_openai.py | 2 +- examples/openai/xml_scraper_openai.py | 2 +- examples/single_node/kg_node.py | 2 +- examples/single_node/robot_node.py | 4 ++-- examples/single_node/search_internet_node.py | 4 ++-- scrapegraphai/helpers/models_tokens.py | 3 ++- 100 files changed, 192 insertions(+), 257 deletions(-) diff --git a/examples/anthropic/csv_scraper_graph_multi_haiku.py b/examples/anthropic/csv_scraper_graph_multi_haiku.py index b833af01..d574da5c 100644 --- a/examples/anthropic/csv_scraper_graph_multi_haiku.py +++ b/examples/anthropic/csv_scraper_graph_multi_haiku.py @@ -26,8 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/csv_scraper_haiku.py b/examples/anthropic/csv_scraper_haiku.py index 2e0ebe81..745926a3 100644 --- a/examples/anthropic/csv_scraper_haiku.py +++ b/examples/anthropic/csv_scraper_haiku.py @@ -32,9 +32,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/custom_graph_haiku.py b/examples/anthropic/custom_graph_haiku.py index cea14361..d8b4dc19 100644 --- a/examples/anthropic/custom_graph_haiku.py +++ b/examples/anthropic/custom_graph_haiku.py @@ -5,10 +5,9 @@ import os from dotenv import load_dotenv -from langchain_openai import OpenAIEmbeddings -from langchain_openai import ChatOpenAI +from langchain_anthropic import ChatAnthropic from scrapegraphai.graphs import BaseGraph -from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode +from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode load_dotenv() # ************************************************ @@ -19,16 +18,14 @@ "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + }, } # ************************************************ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) -embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) +llm_model = ChatAnthropic(graph_config["llm"]) # define the nodes for the graph robot_node = RobotsNode( @@ -57,15 +54,6 @@ "verbose": True, } ) -rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": llm_model, - "embedder_model": embedder, - "verbose": True, - } -) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], @@ -84,14 +72,12 @@ robot_node, fetch_node, parse_node, - rag_node, generate_answer_node, ], edges=[ (robot_node, fetch_node), (fetch_node, parse_node), - (parse_node, rag_node), - (rag_node, generate_answer_node) + (parse_node, generate_answer_node) ], entry_point=robot_node ) diff --git a/examples/anthropic/json_scraper_haiku.py b/examples/anthropic/json_scraper_haiku.py index 2610b658..9d5fc8db 100644 --- a/examples/anthropic/json_scraper_haiku.py +++ b/examples/anthropic/json_scraper_haiku.py @@ -26,9 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/json_scraper_multi_haiku.py b/examples/anthropic/json_scraper_multi_haiku.py index 0327673b..d016439d 100644 --- a/examples/anthropic/json_scraper_multi_haiku.py +++ b/examples/anthropic/json_scraper_multi_haiku.py @@ -11,9 +11,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } FILE_NAME = "inputs/example.json" diff --git a/examples/anthropic/pdf_scraper_graph_haiku.py b/examples/anthropic/pdf_scraper_graph_haiku.py index 61be06b4..ee221ac6 100644 --- a/examples/anthropic/pdf_scraper_graph_haiku.py +++ b/examples/anthropic/pdf_scraper_graph_haiku.py @@ -14,9 +14,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } source = """ diff --git a/examples/anthropic/pdf_scraper_multi_haiku.py b/examples/anthropic/pdf_scraper_multi_haiku.py index 974dd2f8..2d117c35 100644 --- a/examples/anthropic/pdf_scraper_multi_haiku.py +++ b/examples/anthropic/pdf_scraper_multi_haiku.py @@ -11,9 +11,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # *************** diff --git a/examples/anthropic/scrape_plain_text_haiku.py b/examples/anthropic/scrape_plain_text_haiku.py index d3f36638..d3099026 100644 --- a/examples/anthropic/scrape_plain_text_haiku.py +++ b/examples/anthropic/scrape_plain_text_haiku.py @@ -28,9 +28,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/script_generator_haiku.py b/examples/anthropic/script_generator_haiku.py index 889ce0b5..bdd0c23b 100644 --- a/examples/anthropic/script_generator_haiku.py +++ b/examples/anthropic/script_generator_haiku.py @@ -16,9 +16,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/script_multi_generator_haiku.py b/examples/anthropic/script_multi_generator_haiku.py index f7c69010..bacf0bfc 100644 --- a/examples/anthropic/script_multi_generator_haiku.py +++ b/examples/anthropic/script_multi_generator_haiku.py @@ -16,10 +16,9 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, - "library": "beautifulsoup" + "model": "anthropic/claude-3-haiku-20240307", + }, + "library": "beautifulsoup" } # ************************************************ diff --git a/examples/anthropic/search_graph_haiku.py b/examples/anthropic/search_graph_haiku.py index f90d7598..97a5213d 100644 --- a/examples/anthropic/search_graph_haiku.py +++ b/examples/anthropic/search_graph_haiku.py @@ -15,9 +15,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/search_graph_schema_haiku.py b/examples/anthropic/search_graph_schema_haiku.py index c9e7a875..1158d58a 100644 --- a/examples/anthropic/search_graph_schema_haiku.py +++ b/examples/anthropic/search_graph_schema_haiku.py @@ -27,8 +27,8 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/search_link_graph_haiku.py b/examples/anthropic/search_link_graph_haiku.py index ccfbc1d2..70798cf3 100644 --- a/examples/anthropic/search_link_graph_haiku.py +++ b/examples/anthropic/search_link_graph_haiku.py @@ -14,23 +14,11 @@ load_dotenv() -llm_model_instance = AzureChatOpenAI( - openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], - azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] -) - -embedder_model_instance = AzureOpenAIEmbeddings( - azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"], - openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], -) - -# ************************************************ -# Create the SmartScraperGraph instance and run it -# ************************************************ - graph_config = { - "llm": {"model_instance": llm_model_instance}, - "embeddings": {"model_instance": embedder_model_instance} + "llm": { + "api_key": os.getenv("ANTHROPIC_API_KEY"), + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/smart_scraper_haiku.py b/examples/anthropic/smart_scraper_haiku.py index f0bb2a57..51ca1bf5 100644 --- a/examples/anthropic/smart_scraper_haiku.py +++ b/examples/anthropic/smart_scraper_haiku.py @@ -19,9 +19,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } smart_scraper_graph = SmartScraperGraph( diff --git a/examples/anthropic/smart_scraper_multi_haiku.py b/examples/anthropic/smart_scraper_multi_haiku.py index eb2001d4..f96de0ab 100644 --- a/examples/anthropic/smart_scraper_multi_haiku.py +++ b/examples/anthropic/smart_scraper_multi_haiku.py @@ -17,9 +17,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ******************************************************* diff --git a/examples/anthropic/smart_scraper_schema_haiku.py b/examples/anthropic/smart_scraper_schema_haiku.py index 83cedd2a..bd447a06 100644 --- a/examples/anthropic/smart_scraper_schema_haiku.py +++ b/examples/anthropic/smart_scraper_schema_haiku.py @@ -33,8 +33,8 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } smart_scraper_graph = SmartScraperGraph( diff --git a/examples/anthropic/xml_scraper_graph_multi_haiku.py b/examples/anthropic/xml_scraper_graph_multi_haiku.py index 6b79f709..6e9bc5f8 100644 --- a/examples/anthropic/xml_scraper_graph_multi_haiku.py +++ b/examples/anthropic/xml_scraper_graph_multi_haiku.py @@ -26,8 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/xml_scraper_haiku.py b/examples/anthropic/xml_scraper_haiku.py index dd64f571..2dc4b8d2 100644 --- a/examples/anthropic/xml_scraper_haiku.py +++ b/examples/anthropic/xml_scraper_haiku.py @@ -26,9 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/deepseek/csv_scraper_deepseek.py b/examples/deepseek/csv_scraper_deepseek.py index b734b543..60b1c394 100644 --- a/examples/deepseek/csv_scraper_deepseek.py +++ b/examples/deepseek/csv_scraper_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/csv_scraper_graph_multi_deepseek.py b/examples/deepseek/csv_scraper_graph_multi_deepseek.py index ea5e9154..0a08f83f 100644 --- a/examples/deepseek/csv_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/csv_scraper_graph_multi_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py index dfe6f489..02991c0d 100644 --- a/examples/deepseek/json_scraper_deepseek.py +++ b/examples/deepseek/json_scraper_deepseek.py @@ -26,7 +26,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/json_scraper_multi_deepseek.py b/examples/deepseek/json_scraper_multi_deepseek.py index b957dde0..4f9ca32d 100644 --- a/examples/deepseek/json_scraper_multi_deepseek.py +++ b/examples/deepseek/json_scraper_multi_deepseek.py @@ -12,7 +12,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py index d66bbef5..c9c5e0b2 100644 --- a/examples/deepseek/pdf_scraper_graph_deepseek.py +++ b/examples/deepseek/pdf_scraper_graph_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py index 211e4635..e43dd10a 100644 --- a/examples/deepseek/pdf_scraper_multi_deepseek.py +++ b/examples/deepseek/pdf_scraper_multi_deepseek.py @@ -12,7 +12,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/scrape_plain_text_deepseek.py b/examples/deepseek/scrape_plain_text_deepseek.py index d7a070d7..a7834a8f 100644 --- a/examples/deepseek/scrape_plain_text_deepseek.py +++ b/examples/deepseek/scrape_plain_text_deepseek.py @@ -28,7 +28,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/script_generator_deepseek.py b/examples/deepseek/script_generator_deepseek.py index fd5fd4dd..3de06f25 100644 --- a/examples/deepseek/script_generator_deepseek.py +++ b/examples/deepseek/script_generator_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/script_multi_generator_deepseek.py b/examples/deepseek/script_multi_generator_deepseek.py index 2ebfd90a..cc577ecd 100644 --- a/examples/deepseek/script_multi_generator_deepseek.py +++ b/examples/deepseek/script_multi_generator_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_graph_deepseek.py b/examples/deepseek/search_graph_deepseek.py index 176d6107..54d2e9fa 100644 --- a/examples/deepseek/search_graph_deepseek.py +++ b/examples/deepseek/search_graph_deepseek.py @@ -15,7 +15,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_graph_schema_deepseek.py b/examples/deepseek/search_graph_schema_deepseek.py index f5db278e..bcebe76d 100644 --- a/examples/deepseek/search_graph_schema_deepseek.py +++ b/examples/deepseek/search_graph_schema_deepseek.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_link_graph_deepseek.py b/examples/deepseek/search_link_graph_deepseek.py index 6a35f177..96f886a9 100644 --- a/examples/deepseek/search_link_graph_deepseek.py +++ b/examples/deepseek/search_link_graph_deepseek.py @@ -16,7 +16,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_deepseek.py b/examples/deepseek/smart_scraper_deepseek.py index ed291b02..50314819 100644 --- a/examples/deepseek/smart_scraper_deepseek.py +++ b/examples/deepseek/smart_scraper_deepseek.py @@ -18,7 +18,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_multi_deepseek.py b/examples/deepseek/smart_scraper_multi_deepseek.py index fafe7261..374cc6e2 100644 --- a/examples/deepseek/smart_scraper_multi_deepseek.py +++ b/examples/deepseek/smart_scraper_multi_deepseek.py @@ -16,7 +16,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_schema_deepseek.py b/examples/deepseek/smart_scraper_schema_deepseek.py index 5cbbb702..6d164eb1 100644 --- a/examples/deepseek/smart_scraper_schema_deepseek.py +++ b/examples/deepseek/smart_scraper_schema_deepseek.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/xml_scraper_deepseek.py b/examples/deepseek/xml_scraper_deepseek.py index ba401b91..d69665f4 100644 --- a/examples/deepseek/xml_scraper_deepseek.py +++ b/examples/deepseek/xml_scraper_deepseek.py @@ -28,7 +28,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/xml_scraper_graph_multi_deepseek.py b/examples/deepseek/xml_scraper_graph_multi_deepseek.py index 0f53a6b2..5098c9fd 100644 --- a/examples/deepseek/xml_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/xml_scraper_graph_multi_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/ernie/csv_scraper_ernie.py b/examples/ernie/csv_scraper_ernie.py index 1594d17c..410e300e 100644 --- a/examples/ernie/csv_scraper_ernie.py +++ b/examples/ernie/csv_scraper_ernie.py @@ -25,16 +25,12 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 } +} # ************************************************ # Create the CSVScraperGraph instance and run it diff --git a/examples/ernie/custom_graph_ernie.py b/examples/ernie/custom_graph_ernie.py index f750276a..5dad8bac 100644 --- a/examples/ernie/custom_graph_ernie.py +++ b/examples/ernie/custom_graph_ernie.py @@ -14,24 +14,20 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph diff --git a/examples/ernie/deep_scraper_ernie.py b/examples/ernie/deep_scraper_ernie.py index 059f7a74..b8c6501a 100644 --- a/examples/ernie/deep_scraper_ernie.py +++ b/examples/ernie/deep_scraper_ernie.py @@ -18,7 +18,7 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", + "model": "ernie/ernie-bot-turbo", "ernie_client_id": "", "ernie_client_secret": "", "temperature": 0.1 diff --git a/examples/ernie/json_scraper_ernie.py b/examples/ernie/json_scraper_ernie.py index ddd67050..e73ebc10 100644 --- a/examples/ernie/json_scraper_ernie.py +++ b/examples/ernie/json_scraper_ernie.py @@ -21,17 +21,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ diff --git a/examples/ernie/pdf_scraper_graph_ernie.py b/examples/ernie/pdf_scraper_graph_ernie.py index 3de975a0..6016da7a 100644 --- a/examples/ernie/pdf_scraper_graph_ernie.py +++ b/examples/ernie/pdf_scraper_graph_ernie.py @@ -7,16 +7,12 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 } +} source = """ The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian diff --git a/examples/ernie/scrape_plain_text_ernie.py b/examples/ernie/scrape_plain_text_ernie.py index 27b4f08b..c6bb715a 100644 --- a/examples/ernie/scrape_plain_text_ernie.py +++ b/examples/ernie/scrape_plain_text_ernie.py @@ -25,17 +25,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ diff --git a/examples/ernie/script_generator_ernie.py b/examples/ernie/script_generator_ernie.py index 14c00ab4..42e136ff 100644 --- a/examples/ernie/script_generator_ernie.py +++ b/examples/ernie/script_generator_ernie.py @@ -13,12 +13,12 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "library": "beautifulsoup" } diff --git a/examples/ernie/script_multi_generator_ernie.py b/examples/ernie/script_multi_generator_ernie.py index 73e9f5ab..285d491a 100644 --- a/examples/ernie/script_multi_generator_ernie.py +++ b/examples/ernie/script_multi_generator_ernie.py @@ -9,17 +9,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/search_graph_ernie.py b/examples/ernie/search_graph_ernie.py index c04d9f9b..0e811683 100644 --- a/examples/ernie/search_graph_ernie.py +++ b/examples/ernie/search_graph_ernie.py @@ -12,17 +12,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/search_link_graph_ernie.py b/examples/ernie/search_link_graph_ernie.py index 466b230c..f38b2772 100644 --- a/examples/ernie/search_link_graph_ernie.py +++ b/examples/ernie/search_link_graph_ernie.py @@ -8,17 +8,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/smart_scraper_ernie.py b/examples/ernie/smart_scraper_ernie.py index dcee0972..56084dad 100644 --- a/examples/ernie/smart_scraper_ernie.py +++ b/examples/ernie/smart_scraper_ernie.py @@ -14,15 +14,14 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, - "verbose": False, - "headless": False, + "library": "beautifulsoup" } # ************************************************ diff --git a/examples/ernie/smart_scraper_multi_ernie.py b/examples/ernie/smart_scraper_multi_ernie.py index ddfc6239..6b62b685 100644 --- a/examples/ernie/smart_scraper_multi_ernie.py +++ b/examples/ernie/smart_scraper_multi_ernie.py @@ -12,12 +12,12 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-4o", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose": True, "headless": False, diff --git a/examples/ernie/smart_scraper_schema_ernie.py b/examples/ernie/smart_scraper_schema_ernie.py index 64a74937..b0fe3d7e 100644 --- a/examples/ernie/smart_scraper_schema_ernie.py +++ b/examples/ernie/smart_scraper_schema_ernie.py @@ -34,10 +34,12 @@ class Projects(BaseModel): openai_key = os.getenv("OPENAI_APIKEY") -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose": True, "headless": False, diff --git a/examples/ernie/speech_graph_ernie.py b/examples/ernie/speech_graph_ernie.py index 15cc2cfb..cece3149 100644 --- a/examples/ernie/speech_graph_ernie.py +++ b/examples/ernie/speech_graph_ernie.py @@ -24,9 +24,10 @@ graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", - "temperature": 0.7, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "tts_model": { "api_key": openai_key, diff --git a/examples/ernie/xml_scraper_ernie.py b/examples/ernie/xml_scraper_ernie.py index 5be5716e..a5bf03e0 100644 --- a/examples/ernie/xml_scraper_ernie.py +++ b/examples/ernie/xml_scraper_ernie.py @@ -27,8 +27,10 @@ graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose":False, } diff --git a/examples/mistral/csv_scraper_graph_multi_mistral.py b/examples/mistral/csv_scraper_graph_multi_mistral.py index c3a25e2a..615e59e4 100644 --- a/examples/mistral/csv_scraper_graph_multi_mistral.py +++ b/examples/mistral/csv_scraper_graph_multi_mistral.py @@ -27,7 +27,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/csv_scraper_mistral.py b/examples/mistral/csv_scraper_mistral.py index 63ecfbca..195fb16a 100644 --- a/examples/mistral/csv_scraper_mistral.py +++ b/examples/mistral/csv_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/custom_graph_mistral.py b/examples/mistral/custom_graph_mistral.py index c839f7b6..f02ead0c 100644 --- a/examples/mistral/custom_graph_mistral.py +++ b/examples/mistral/custom_graph_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/deep_scraper_mistral.py b/examples/mistral/deep_scraper_mistral.py index 5cf576e7..bf0f6ba4 100644 --- a/examples/mistral/deep_scraper_mistral.py +++ b/examples/mistral/deep_scraper_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "max_depth": 1 diff --git a/examples/mistral/json_scraper_mistral.py b/examples/mistral/json_scraper_mistral.py index 2a29c5a7..12f55127 100644 --- a/examples/mistral/json_scraper_mistral.py +++ b/examples/mistral/json_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/json_scraper_multi_mistral.py b/examples/mistral/json_scraper_multi_mistral.py index 07e65c95..1369eda7 100644 --- a/examples/mistral/json_scraper_multi_mistral.py +++ b/examples/mistral/json_scraper_multi_mistral.py @@ -13,7 +13,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", } } diff --git a/examples/mistral/md_scraper_mistral.py b/examples/mistral/md_scraper_mistral.py index 45995cb7..c4e3f2c7 100644 --- a/examples/mistral/md_scraper_mistral.py +++ b/examples/mistral/md_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/pdf_scraper_mistral.py b/examples/mistral/pdf_scraper_mistral.py index 9636f7f7..b006fdb8 100644 --- a/examples/mistral/pdf_scraper_mistral.py +++ b/examples/mistral/pdf_scraper_mistral.py @@ -14,7 +14,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, } diff --git a/examples/mistral/pdf_scraper_multi_mistral.py b/examples/mistral/pdf_scraper_multi_mistral.py index 97ad3222..e9f1613f 100644 --- a/examples/mistral/pdf_scraper_multi_mistral.py +++ b/examples/mistral/pdf_scraper_multi_mistral.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, } diff --git a/examples/mistral/scrape_plain_text_mistral.py b/examples/mistral/scrape_plain_text_mistral.py index 3bf199ad..f2b38172 100644 --- a/examples/mistral/scrape_plain_text_mistral.py +++ b/examples/mistral/scrape_plain_text_mistral.py @@ -30,7 +30,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/script_generator_mistral.py b/examples/mistral/script_generator_mistral.py index 464a522c..4fe45773 100644 --- a/examples/mistral/script_generator_mistral.py +++ b/examples/mistral/script_generator_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup" } diff --git a/examples/mistral/script_generator_schema_mistral.py b/examples/mistral/script_generator_schema_mistral.py index 8172f9a1..b9c77285 100644 --- a/examples/mistral/script_generator_schema_mistral.py +++ b/examples/mistral/script_generator_schema_mistral.py @@ -32,7 +32,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/mistral/script_multi_generator_mistral.py b/examples/mistral/script_multi_generator_mistral.py index 4efa6914..f4d5d5b5 100644 --- a/examples/mistral/script_multi_generator_mistral.py +++ b/examples/mistral/script_multi_generator_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/mistral/search_graph_mistral.py b/examples/mistral/search_graph_mistral.py index 68a480d3..f8573f5e 100644 --- a/examples/mistral/search_graph_mistral.py +++ b/examples/mistral/search_graph_mistral.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "max_results": 2, "verbose": True, diff --git a/examples/mistral/search_graph_schema_mistral.py b/examples/mistral/search_graph_schema_mistral.py index d4588289..7c71c0b1 100644 --- a/examples/mistral/search_graph_schema_mistral.py +++ b/examples/mistral/search_graph_schema_mistral.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "max_results": 2, "verbose": True, diff --git a/examples/mistral/search_link_graph_mistral.py b/examples/mistral/search_link_graph_mistral.py index 7191b27e..3216ff2c 100644 --- a/examples/mistral/search_link_graph_mistral.py +++ b/examples/mistral/search_link_graph_mistral.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_mistral.py b/examples/mistral/smart_scraper_mistral.py index 80d09e6d..7291a40a 100644 --- a/examples/mistral/smart_scraper_mistral.py +++ b/examples/mistral/smart_scraper_mistral.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": os.getenv("MISTRAL_API_KEY"), - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_multi_mistral.py b/examples/mistral/smart_scraper_multi_mistral.py index c86bb787..2654fbcb 100644 --- a/examples/mistral/smart_scraper_multi_mistral.py +++ b/examples/mistral/smart_scraper_multi_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_schema_mistral.py b/examples/mistral/smart_scraper_schema_mistral.py index 6d6b9ad3..3e1e505a 100644 --- a/examples/mistral/smart_scraper_schema_mistral.py +++ b/examples/mistral/smart_scraper_schema_mistral.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key":mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/xml_scraper_graph_multi_mistral.py b/examples/mistral/xml_scraper_graph_multi_mistral.py index b9d46b0e..0ea9d30c 100644 --- a/examples/mistral/xml_scraper_graph_multi_mistral.py +++ b/examples/mistral/xml_scraper_graph_multi_mistral.py @@ -29,7 +29,7 @@ graph_config = { "llm": { "api_key":mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/xml_scraper_mistral.py b/examples/mistral/xml_scraper_mistral.py index c2675c6d..eb6036bf 100644 --- a/examples/mistral/xml_scraper_mistral.py +++ b/examples/mistral/xml_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose":False, } diff --git a/examples/openai/csv_scraper_graph_multi_openai.py b/examples/openai/csv_scraper_graph_multi_openai.py index 7b91c896..5e876dcb 100644 --- a/examples/openai/csv_scraper_graph_multi_openai.py +++ b/examples/openai/csv_scraper_graph_multi_openai.py @@ -27,7 +27,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/csv_scraper_openai.py b/examples/openai/csv_scraper_openai.py index 744fc7a4..f4410fcd 100644 --- a/examples/openai/csv_scraper_openai.py +++ b/examples/openai/csv_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/custom_graph_openai.py b/examples/openai/custom_graph_openai.py index cc7e715d..b1471a21 100644 --- a/examples/openai/custom_graph_openai.py +++ b/examples/openai/custom_graph_openai.py @@ -27,7 +27,7 @@ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph diff --git a/examples/openai/deep_scraper_openai.py b/examples/openai/deep_scraper_openai.py index 5b7202d4..b20e164d 100644 --- a/examples/openai/deep_scraper_openai.py +++ b/examples/openai/deep_scraper_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "max_depth": 1 diff --git a/examples/openai/json_scraper_multi_openai.py b/examples/openai/json_scraper_multi_openai.py index b27e5050..f7cb528a 100644 --- a/examples/openai/json_scraper_multi_openai.py +++ b/examples/openai/json_scraper_multi_openai.py @@ -13,7 +13,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", } } diff --git a/examples/openai/json_scraper_openai.py b/examples/openai/json_scraper_openai.py index eb5d1e7e..e20a5870 100644 --- a/examples/openai/json_scraper_openai.py +++ b/examples/openai/json_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/md_scraper_openai.py b/examples/openai/md_scraper_openai.py index 2c264ab9..3456c89a 100644 --- a/examples/openai/md_scraper_openai.py +++ b/examples/openai/md_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/omni_scraper_openai.py b/examples/openai/omni_scraper_openai.py index 1d1d86ba..3e6e62ee 100644 --- a/examples/openai/omni_scraper_openai.py +++ b/examples/openai/omni_scraper_openai.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": True, diff --git a/examples/openai/omni_search_openai.py b/examples/openai/omni_search_openai.py index ed0f8f3c..fb967def 100644 --- a/examples/openai/omni_search_openai.py +++ b/examples/openai/omni_search_openai.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "max_results": 2, "max_images": 1, diff --git a/examples/openai/pdf_scraper_multi_openai.py b/examples/openai/pdf_scraper_multi_openai.py index 49a9c7fa..91e219e3 100644 --- a/examples/openai/pdf_scraper_multi_openai.py +++ b/examples/openai/pdf_scraper_multi_openai.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, } diff --git a/examples/openai/pdf_scraper_openai.py b/examples/openai/pdf_scraper_openai.py index 2b0e19f3..e076defe 100644 --- a/examples/openai/pdf_scraper_openai.py +++ b/examples/openai/pdf_scraper_openai.py @@ -14,7 +14,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, } diff --git a/examples/openai/scrape_plain_text_openai.py b/examples/openai/scrape_plain_text_openai.py index 7f390cff..eb8c76e5 100644 --- a/examples/openai/scrape_plain_text_openai.py +++ b/examples/openai/scrape_plain_text_openai.py @@ -30,7 +30,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/screenshot_scraper.py b/examples/openai/screenshot_scraper.py index 826dcc50..c72c44d1 100644 --- a/examples/openai/screenshot_scraper.py +++ b/examples/openai/screenshot_scraper.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/script_generator_openai.py b/examples/openai/script_generator_openai.py index 046a25ec..e67ad52b 100644 --- a/examples/openai/script_generator_openai.py +++ b/examples/openai/script_generator_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "library": "beautifulsoup" } diff --git a/examples/openai/script_generator_schema_openai.py b/examples/openai/script_generator_schema_openai.py index a728c8a1..5e542c53 100644 --- a/examples/openai/script_generator_schema_openai.py +++ b/examples/openai/script_generator_schema_openai.py @@ -32,7 +32,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "openai/gpt-3.5-turbo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/openai/script_multi_generator_openai.py b/examples/openai/script_multi_generator_openai.py index d46d2294..3fdd029f 100644 --- a/examples/openai/script_multi_generator_openai.py +++ b/examples/openai/script_multi_generator_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/openai/search_graph_openai.py b/examples/openai/search_graph_openai.py index c12caa4f..8d869c19 100644 --- a/examples/openai/search_graph_openai.py +++ b/examples/openai/search_graph_openai.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "max_results": 2, "verbose": True, diff --git a/examples/openai/search_graph_schema_openai.py b/examples/openai/search_graph_schema_openai.py index ecbcc644..571f08b0 100644 --- a/examples/openai/search_graph_schema_openai.py +++ b/examples/openai/search_graph_schema_openai.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "openai/gpt-3.5-turbo", }, "max_results": 2, "verbose": True, diff --git a/examples/openai/search_link_graph_openai.py b/examples/openai/search_link_graph_openai.py index 818f9434..a988731b 100644 --- a/examples/openai/search_link_graph_openai.py +++ b/examples/openai/search_link_graph_openai.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_multi_openai.py b/examples/openai/smart_scraper_multi_openai.py index 504e00a8..8f5e648b 100644 --- a/examples/openai/smart_scraper_multi_openai.py +++ b/examples/openai/smart_scraper_multi_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index 119f67e5..2962f51b 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_schema_openai.py b/examples/openai/smart_scraper_schema_openai.py index 828a9b0a..0c1618d6 100644 --- a/examples/openai/smart_scraper_schema_openai.py +++ b/examples/openai/smart_scraper_schema_openai.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key":openai_key, - "model": "gpt-4o-mini", + "model": "openai/gpt-4o-mini", }, "verbose": True, "headless": False, diff --git a/examples/openai/speech_graph_openai.py b/examples/openai/speech_graph_openai.py index 603ce51c..7c368df7 100644 --- a/examples/openai/speech_graph_openai.py +++ b/examples/openai/speech_graph_openai.py @@ -25,7 +25,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", "temperature": 0.7, }, "tts_model": { diff --git a/examples/openai/xml_scraper_graph_multi_openai.py b/examples/openai/xml_scraper_graph_multi_openai.py index ef46b877..6610a49f 100644 --- a/examples/openai/xml_scraper_graph_multi_openai.py +++ b/examples/openai/xml_scraper_graph_multi_openai.py @@ -29,7 +29,7 @@ graph_config = { "llm": { "api_key":openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/xml_scraper_openai.py b/examples/openai/xml_scraper_openai.py index b2b5075e..04b3ec9d 100644 --- a/examples/openai/xml_scraper_openai.py +++ b/examples/openai/xml_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose":False, } diff --git a/examples/single_node/kg_node.py b/examples/single_node/kg_node.py index dd5a6d04..37d1d9a4 100644 --- a/examples/single_node/kg_node.py +++ b/examples/single_node/kg_node.py @@ -57,7 +57,7 @@ # Define the node # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) robots_node = KnowledgeGraphNode( input="user_prompt & answer_dict", diff --git a/examples/single_node/robot_node.py b/examples/single_node/robot_node.py index c2bcbbd1..dcb70e3d 100644 --- a/examples/single_node/robot_node.py +++ b/examples/single_node/robot_node.py @@ -11,12 +11,12 @@ graph_config = { "llm": { - "model": "ollama/llama3", + "model": "llama3", "temperature": 0, "streaming": True }, "embeddings": { - "model": "ollama/nomic-embed-text", + "model": "nomic-embed-text", "temperature": 0, # "base_url": "http://localhost:11434", # set ollama URL arbitrarily } diff --git a/examples/single_node/search_internet_node.py b/examples/single_node/search_internet_node.py index 8a8149fa..c998cdd1 100644 --- a/examples/single_node/search_internet_node.py +++ b/examples/single_node/search_internet_node.py @@ -2,7 +2,7 @@ Example of custom graph using existing nodes """ -from scrapegraphai.models import Ollama +from langchain_community.chat_models import ChatOllama from scrapegraphai.nodes import SearchInternetNode # ************************************************ @@ -24,7 +24,7 @@ # Define the node # ************************************************ -llm_model = Ollama(graph_config["llm"]) +llm_model = ChatOllama(graph_config["llm"]) search_node = SearchInternetNode( input="user_input", diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index d4c9f39e..556d6ad6 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -51,7 +51,7 @@ "gemini-1.5-pro-latest": 128000, "models/embedding-001": 2048 }, - "google_vertexai": { + "google_vertexai": { "gemini-1.5-flash": 128000, "gemini-1.5-pro": 128000, "gemini-1.0-pro": 128000, @@ -137,6 +137,7 @@ "claude-3-sonnet-20240229": 200000, "claude-3-haiku-20240307": 200000, "claude-3-5-sonnet-20240620": 200000, + "claude-3-haiku-20240307": 4000, }, "bedrock": { "anthropic.claude-3-5-sonnet-20240620": 200000, From 229d74d4bd39befa3723fa2841e23d40007a9772 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:46:42 +0200 Subject: [PATCH 08/21] test(AbstractGraph): add AbstractGraph tests --- requirements-dev.lock | 149 --------------------------- requirements.lock | 154 ---------------------------- requirements.txt | 14 +-- tests/graphs/abstract_graph_test.py | 31 ++++++ 4 files changed, 34 insertions(+), 314 deletions(-) create mode 100644 tests/graphs/abstract_graph_test.py diff --git a/requirements-dev.lock b/requirements-dev.lock index 64af8ee8..04ca69d9 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -15,8 +15,6 @@ aiohappyeyeballs==2.3.5 aiohttp==3.10.3 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp alabaster==0.7.16 @@ -25,11 +23,7 @@ altair==5.4.0 # via streamlit annotated-types==0.7.0 # via pydantic -anthropic==0.33.0 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai # via starlette @@ -55,8 +49,6 @@ boto3==1.34.158 botocore==1.34.158 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai burr==0.22.1 # via scrapegraphai cachetools==5.4.0 @@ -78,17 +70,11 @@ cycler==0.12.1 # via matplotlib dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess # via pylint distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform docutils==0.19 # via sphinx exceptiongroup==1.2.2 @@ -102,10 +88,6 @@ fastapi-pagination==0.12.26 # via burr filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.15.0 - # via langchain-fireworks fonttools==4.53.1 # via matplotlib free-proxy==1.1.1 @@ -115,7 +97,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch furo==2024.5.6 # via scrapegraphai gitdb==4.0.11 @@ -129,11 +110,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.140.0 # via google-generativeai @@ -142,52 +118,21 @@ google-auth==2.33.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.61.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.5 - # via google-cloud-aiplatform -google-cloud-storage==2.18.2 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.2 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status graphviz==0.20.3 # via burr - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.4 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.3 # via google-api-core @@ -202,20 +147,12 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.5 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx @@ -236,15 +173,11 @@ jinja2==3.1.4 # via burr # via pydeck # via sphinx - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -258,40 +191,22 @@ kiwisolver==1.4.5 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.22 - # via scrapegraphai langchain-aws==0.1.16 # via scrapegraphai langchain-community==0.2.11 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.7 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.8 - # via scrapegraphai -langchain-groq==0.1.9 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.2.1 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -320,8 +235,6 @@ minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -331,8 +244,6 @@ mypy-extensions==1.0.0 # via typing-inspect narwhals==1.3.0 # via altair -networkx==3.2.1 - # via torch numpy==1.26.4 # via contourpy # via faiss-cpu @@ -343,24 +254,16 @@ numpy==1.26.4 # via pandas # via pyarrow # via pydeck - # via scikit-learn - # via scipy - # via sentence-transformers # via sf-hamilton - # via shapely # via streamlit - # via transformers openai==1.40.3 # via burr - # via langchain-fireworks # via langchain-openai orjson==3.10.7 # via langsmith packaging==24.1 # via altair # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow @@ -368,21 +271,16 @@ packaging==24.1 # via pytest # via sphinx # via streamlit - # via transformers pandas==2.2.2 # via scrapegraphai # via sf-hamilton # via streamlit pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints # via matplotlib - # via sentence-transformers # via streamlit platformdirs==4.2.2 # via pylint playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright pluggy==1.5.0 @@ -390,16 +288,11 @@ pluggy==1.5.0 proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.4 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus # via streamlit @@ -411,15 +304,10 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase # via burr # via fastapi # via fastapi-pagination - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -444,7 +332,6 @@ pytest==8.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via matplotlib # via pandas python-dotenv==1.0.1 @@ -456,28 +343,22 @@ pyyaml==6.0.2 # via langchain # via langchain-community # via langchain-core - # via transformers referencing==0.35.1 # via jsonschema # via jsonschema-specifications regex==2024.7.24 # via tiktoken - # via transformers requests==2.32.3 # via burr # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via sphinx # via streamlit # via tiktoken - # via transformers rich==13.7.1 # via streamlit rpds-py==0.20.0 @@ -487,29 +368,16 @@ rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.4 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface sf-hamilton==1.73.1 # via burr -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil smmap==5.0.1 # via gitdb sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai snowballstemmer==2.2.0 @@ -541,23 +409,16 @@ starlette==0.37.2 # via fastapi streamlit==1.37.1 # via burr -sympy==1.13.2 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core # via streamlit -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers toml==0.10.2 # via streamlit tomli==2.0.1 @@ -565,8 +426,6 @@ tomli==2.0.1 # via pytest tomlkit==0.13.0 # via pylint -torch==2.2.2 - # via sentence-transformers tornado==6.4.1 # via streamlit tqdm==4.66.5 @@ -576,20 +435,13 @@ tqdm==4.66.5 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.44.0 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 # via altair - # via anthropic # via anyio # via astroid # via fastapi # via fastapi-pagination # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -601,7 +453,6 @@ typing-extensions==4.12.2 # via sqlalchemy # via starlette # via streamlit - # via torch # via typing-inspect # via uvicorn typing-inspect==0.9.0 diff --git a/requirements.lock b/requirements.lock index 1d80e1bf..f3cb5626 100644 --- a/requirements.lock +++ b/requirements.lock @@ -11,17 +11,11 @@ aiohttp==3.9.5 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp annotated-types==0.7.0 # via pydantic -anthropic==0.31.2 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai async-timeout==4.0.3 @@ -37,8 +31,6 @@ boto3==1.34.146 botocore==1.34.146 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai cachetools==5.4.0 # via google-auth certifi==2024.7.4 @@ -49,26 +41,16 @@ charset-normalizer==3.3.2 # via requests dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform exceptiongroup==1.2.2 # via anyio faiss-cpu==1.8.0.post1 # via scrapegraphai filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.14.0 - # via langchain-fireworks free-proxy==1.1.1 # via scrapegraphai frozenlist==1.4.1 @@ -76,7 +58,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch google==3.0.0 # via scrapegraphai google-ai-generativelanguage==0.6.6 @@ -84,11 +65,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.137.0 # via google-generativeai @@ -97,51 +73,19 @@ google-auth==2.32.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.59.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.4 - # via google-cloud-aiplatform -google-cloud-storage==2.18.0 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.1 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status -graphviz==0.20.3 - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.1 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.2 # via google-api-core @@ -155,35 +99,22 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.1 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx # via requests # via yarl -jinja2==3.1.4 - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -191,40 +122,22 @@ jsonpointer==3.0.0 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.20 - # via scrapegraphai langchain-aws==0.1.12 # via scrapegraphai langchain-community==0.2.10 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.5 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.7 - # via scrapegraphai -langchain-groq==0.1.6 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.1.7 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -235,16 +148,12 @@ langsmith==0.1.93 # via langchain-core lxml==5.2.2 # via free-proxy -markupsafe==2.1.5 - # via jinja2 marshmallow==3.21.3 # via dataclasses-json minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -252,55 +161,34 @@ multiprocess==0.70.16 # via mpire mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2.1 - # via torch numpy==1.26.4 # via faiss-cpu # via langchain # via langchain-aws # via langchain-community # via pandas - # via scikit-learn - # via scipy - # via sentence-transformers - # via shapely - # via transformers openai==1.41.0 - # via langchain-fireworks # via langchain-openai orjson==3.10.6 # via langsmith packaging==24.1 # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow - # via transformers pandas==2.2.2 # via scrapegraphai -pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints - # via sentence-transformers playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.3 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus pyasn1==0.6.0 @@ -309,12 +197,7 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -329,7 +212,6 @@ pyparsing==3.1.2 # via httplib2 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via pandas python-dotenv==1.0.1 # via scrapegraphai @@ -340,45 +222,26 @@ pyyaml==6.0.1 # via langchain # via langchain-community # via langchain-core - # via transformers regex==2024.5.15 # via tiktoken - # via transformers requests==2.32.3 # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via tiktoken - # via transformers rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.3 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai soupsieve==2.5 @@ -386,24 +249,15 @@ soupsieve==2.5 sqlalchemy==2.0.31 # via langchain # via langchain-community -sympy==1.13.1 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers -torch==2.2.2 - # via sentence-transformers tqdm==4.66.4 # via google-generativeai # via huggingface-hub @@ -411,16 +265,9 @@ tqdm==4.66.4 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.43.3 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 - # via anthropic # via anyio # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -428,7 +275,6 @@ typing-extensions==4.12.2 # via pydantic-core # via pyee # via sqlalchemy - # via torch # via typing-inspect typing-inspect==0.9.0 # via dataclasses-json diff --git a/requirements.txt b/requirements.txt index 21c2fd3b..80cb0767 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,9 @@ langchain>=0.2.14 -langchain-fireworks>=0.1.3 -langchain_community>=0.2.9 langchain-google-genai>=1.0.7 -langchain-google-vertexai>=1.0.7 langchain-openai>=0.1.22 -langchain-groq>=0.1.3 -langchain-aws>=0.1.3 -langchain-anthropic>=0.1.11 langchain-mistralai>=0.1.12 -langchain-huggingface>=0.0.3 -langchain-nvidia-ai-endpoints>=0.1.6 +langchain_community>=0.2.9 +langchain-aws>=0.1.3 html2text>=2024.2.26 faiss-cpu>=1.8.0 beautifulsoup4>=4.12.3 @@ -17,11 +11,9 @@ pandas>=2.2.2 python-dotenv>=1.0.1 tiktoken>=0.7 tqdm>=4.66.4 -graphviz>=0.20.3 minify-html>=0.15.0 free-proxy>=1.1.1 playwright>=1.43.0 -google>=3.0.0 undetected-playwright>=0.3.0 +google>=3.0.0 semchunk>=1.0.1 -browserbase>=0.3.0 diff --git a/tests/graphs/abstract_graph_test.py b/tests/graphs/abstract_graph_test.py new file mode 100644 index 00000000..805a1691 --- /dev/null +++ b/tests/graphs/abstract_graph_test.py @@ -0,0 +1,31 @@ +""" +Tests for the AbstractGraph. +""" +import pytest +from unittest.mock import patch +from scrapegraphai.graphs import AbstractGraph + +class TestAbstractGraph: + @pytest.mark.parametrize("llm_config, expected_model", [ + ({"model": "openai/gpt-3.5-turbo"}, "ChatOpenAI"), + ({"model": "azure_openai/gpt-3.5-turbo"}, "AzureChatOpenAI"), + ({"model": "google_genai/gemini-pro"}, "ChatGoogleGenerativeAI"), + ({"model": "google_vertexai/chat-bison"}, "ChatVertexAI"), + ({"model": "ollama/llama2"}, "Ollama"), + ({"model": "oneapi/text-davinci-003"}, "OneApi"), + ({"model": "nvidia/clara-instant-1-base"}, "ChatNVIDIA"), + ({"model": "deepseek/deepseek-coder-6.7b-instruct"}, "DeepSeek"), + ({"model": "ernie/ernie-bot"}, "ErnieBotChat"), + ]) + def test_create_llm(self, llm_config, expected_model): + graph = AbstractGraph("Test prompt", {"llm": llm_config}) + assert isinstance(graph.llm_model, expected_model) + + def test_create_llm_unknown_provider(self): + with pytest.raises(ValueError): + AbstractGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}}) + + def test_create_llm_error(self): + with patch("your_module.init_chat_model", side_effect=Exception("Test error")): + with pytest.raises(Exception): + AbstractGraph("Test prompt", {"llm": {"model": "openai/gpt-3.5-turbo"}}) From 5c16ee985b11948c6a8c1dbfd051d458fa193973 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 16:44:05 +0200 Subject: [PATCH 09/21] fix(docloaders): BrowserBase dynamic import --- scrapegraphai/docloaders/browser_base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py index 9b60f36f..318c9f38 100644 --- a/scrapegraphai/docloaders/browser_base.py +++ b/scrapegraphai/docloaders/browser_base.py @@ -2,7 +2,6 @@ browserbase integration module """ from typing import List -from browserbase import Browserbase def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[str]: """ @@ -41,6 +40,12 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s object: The result of the loading operation. """ + try: + from browserbase import Browserbase + except ImportError: + raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.") + + browserbase = Browserbase(api_key=api_key, project_id=project_id) result = [] From 83e71df2e2cb3b6bfba11f8879d5c4917a3e1837 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 16:54:57 +0200 Subject: [PATCH 10/21] fix: set up dynamic imports correctly --- pyproject.toml | 1 + requirements-dev.lock | 149 ------------------- requirements.lock | 154 -------------------- requirements.txt | 14 +- scrapegraphai/nodes/fetch_node.py | 5 +- scrapegraphai/nodes/generate_answer_node.py | 21 ++- scrapegraphai/nodes/rag_node.py | 70 ++++----- 7 files changed, 56 insertions(+), 358 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3616c032..5afe841f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ docs = ["sphinx==6.0", "furo==2024.5.6"] # Group 1: Other Language Models other-language-models = [ + "langchain-google-vertexai>=1.0.7", "langchain-fireworks>=0.1.3", "langchain-groq>=0.1.3", "langchain-anthropic>=0.1.11", diff --git a/requirements-dev.lock b/requirements-dev.lock index 64af8ee8..04ca69d9 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -15,8 +15,6 @@ aiohappyeyeballs==2.3.5 aiohttp==3.10.3 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp alabaster==0.7.16 @@ -25,11 +23,7 @@ altair==5.4.0 # via streamlit annotated-types==0.7.0 # via pydantic -anthropic==0.33.0 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai # via starlette @@ -55,8 +49,6 @@ boto3==1.34.158 botocore==1.34.158 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai burr==0.22.1 # via scrapegraphai cachetools==5.4.0 @@ -78,17 +70,11 @@ cycler==0.12.1 # via matplotlib dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess # via pylint distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform docutils==0.19 # via sphinx exceptiongroup==1.2.2 @@ -102,10 +88,6 @@ fastapi-pagination==0.12.26 # via burr filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.15.0 - # via langchain-fireworks fonttools==4.53.1 # via matplotlib free-proxy==1.1.1 @@ -115,7 +97,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch furo==2024.5.6 # via scrapegraphai gitdb==4.0.11 @@ -129,11 +110,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.140.0 # via google-generativeai @@ -142,52 +118,21 @@ google-auth==2.33.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.61.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.5 - # via google-cloud-aiplatform -google-cloud-storage==2.18.2 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.2 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status graphviz==0.20.3 # via burr - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.4 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.3 # via google-api-core @@ -202,20 +147,12 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.5 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx @@ -236,15 +173,11 @@ jinja2==3.1.4 # via burr # via pydeck # via sphinx - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -258,40 +191,22 @@ kiwisolver==1.4.5 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.22 - # via scrapegraphai langchain-aws==0.1.16 # via scrapegraphai langchain-community==0.2.11 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.7 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.8 - # via scrapegraphai -langchain-groq==0.1.9 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.2.1 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -320,8 +235,6 @@ minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -331,8 +244,6 @@ mypy-extensions==1.0.0 # via typing-inspect narwhals==1.3.0 # via altair -networkx==3.2.1 - # via torch numpy==1.26.4 # via contourpy # via faiss-cpu @@ -343,24 +254,16 @@ numpy==1.26.4 # via pandas # via pyarrow # via pydeck - # via scikit-learn - # via scipy - # via sentence-transformers # via sf-hamilton - # via shapely # via streamlit - # via transformers openai==1.40.3 # via burr - # via langchain-fireworks # via langchain-openai orjson==3.10.7 # via langsmith packaging==24.1 # via altair # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow @@ -368,21 +271,16 @@ packaging==24.1 # via pytest # via sphinx # via streamlit - # via transformers pandas==2.2.2 # via scrapegraphai # via sf-hamilton # via streamlit pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints # via matplotlib - # via sentence-transformers # via streamlit platformdirs==4.2.2 # via pylint playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright pluggy==1.5.0 @@ -390,16 +288,11 @@ pluggy==1.5.0 proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.4 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus # via streamlit @@ -411,15 +304,10 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase # via burr # via fastapi # via fastapi-pagination - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -444,7 +332,6 @@ pytest==8.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via matplotlib # via pandas python-dotenv==1.0.1 @@ -456,28 +343,22 @@ pyyaml==6.0.2 # via langchain # via langchain-community # via langchain-core - # via transformers referencing==0.35.1 # via jsonschema # via jsonschema-specifications regex==2024.7.24 # via tiktoken - # via transformers requests==2.32.3 # via burr # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via sphinx # via streamlit # via tiktoken - # via transformers rich==13.7.1 # via streamlit rpds-py==0.20.0 @@ -487,29 +368,16 @@ rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.4 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface sf-hamilton==1.73.1 # via burr -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil smmap==5.0.1 # via gitdb sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai snowballstemmer==2.2.0 @@ -541,23 +409,16 @@ starlette==0.37.2 # via fastapi streamlit==1.37.1 # via burr -sympy==1.13.2 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core # via streamlit -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers toml==0.10.2 # via streamlit tomli==2.0.1 @@ -565,8 +426,6 @@ tomli==2.0.1 # via pytest tomlkit==0.13.0 # via pylint -torch==2.2.2 - # via sentence-transformers tornado==6.4.1 # via streamlit tqdm==4.66.5 @@ -576,20 +435,13 @@ tqdm==4.66.5 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.44.0 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 # via altair - # via anthropic # via anyio # via astroid # via fastapi # via fastapi-pagination # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -601,7 +453,6 @@ typing-extensions==4.12.2 # via sqlalchemy # via starlette # via streamlit - # via torch # via typing-inspect # via uvicorn typing-inspect==0.9.0 diff --git a/requirements.lock b/requirements.lock index 1d80e1bf..f3cb5626 100644 --- a/requirements.lock +++ b/requirements.lock @@ -11,17 +11,11 @@ aiohttp==3.9.5 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp annotated-types==0.7.0 # via pydantic -anthropic==0.31.2 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai async-timeout==4.0.3 @@ -37,8 +31,6 @@ boto3==1.34.146 botocore==1.34.146 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai cachetools==5.4.0 # via google-auth certifi==2024.7.4 @@ -49,26 +41,16 @@ charset-normalizer==3.3.2 # via requests dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform exceptiongroup==1.2.2 # via anyio faiss-cpu==1.8.0.post1 # via scrapegraphai filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.14.0 - # via langchain-fireworks free-proxy==1.1.1 # via scrapegraphai frozenlist==1.4.1 @@ -76,7 +58,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch google==3.0.0 # via scrapegraphai google-ai-generativelanguage==0.6.6 @@ -84,11 +65,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.137.0 # via google-generativeai @@ -97,51 +73,19 @@ google-auth==2.32.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.59.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.4 - # via google-cloud-aiplatform -google-cloud-storage==2.18.0 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.1 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status -graphviz==0.20.3 - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.1 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.2 # via google-api-core @@ -155,35 +99,22 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.1 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx # via requests # via yarl -jinja2==3.1.4 - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -191,40 +122,22 @@ jsonpointer==3.0.0 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.20 - # via scrapegraphai langchain-aws==0.1.12 # via scrapegraphai langchain-community==0.2.10 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.5 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.7 - # via scrapegraphai -langchain-groq==0.1.6 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.1.7 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -235,16 +148,12 @@ langsmith==0.1.93 # via langchain-core lxml==5.2.2 # via free-proxy -markupsafe==2.1.5 - # via jinja2 marshmallow==3.21.3 # via dataclasses-json minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -252,55 +161,34 @@ multiprocess==0.70.16 # via mpire mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2.1 - # via torch numpy==1.26.4 # via faiss-cpu # via langchain # via langchain-aws # via langchain-community # via pandas - # via scikit-learn - # via scipy - # via sentence-transformers - # via shapely - # via transformers openai==1.41.0 - # via langchain-fireworks # via langchain-openai orjson==3.10.6 # via langsmith packaging==24.1 # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow - # via transformers pandas==2.2.2 # via scrapegraphai -pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints - # via sentence-transformers playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.3 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus pyasn1==0.6.0 @@ -309,12 +197,7 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -329,7 +212,6 @@ pyparsing==3.1.2 # via httplib2 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via pandas python-dotenv==1.0.1 # via scrapegraphai @@ -340,45 +222,26 @@ pyyaml==6.0.1 # via langchain # via langchain-community # via langchain-core - # via transformers regex==2024.5.15 # via tiktoken - # via transformers requests==2.32.3 # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via tiktoken - # via transformers rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.3 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai soupsieve==2.5 @@ -386,24 +249,15 @@ soupsieve==2.5 sqlalchemy==2.0.31 # via langchain # via langchain-community -sympy==1.13.1 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers -torch==2.2.2 - # via sentence-transformers tqdm==4.66.4 # via google-generativeai # via huggingface-hub @@ -411,16 +265,9 @@ tqdm==4.66.4 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.43.3 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 - # via anthropic # via anyio # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -428,7 +275,6 @@ typing-extensions==4.12.2 # via pydantic-core # via pyee # via sqlalchemy - # via torch # via typing-inspect typing-inspect==0.9.0 # via dataclasses-json diff --git a/requirements.txt b/requirements.txt index 21c2fd3b..80cb0767 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,9 @@ langchain>=0.2.14 -langchain-fireworks>=0.1.3 -langchain_community>=0.2.9 langchain-google-genai>=1.0.7 -langchain-google-vertexai>=1.0.7 langchain-openai>=0.1.22 -langchain-groq>=0.1.3 -langchain-aws>=0.1.3 -langchain-anthropic>=0.1.11 langchain-mistralai>=0.1.12 -langchain-huggingface>=0.0.3 -langchain-nvidia-ai-endpoints>=0.1.6 +langchain_community>=0.2.9 +langchain-aws>=0.1.3 html2text>=2024.2.26 faiss-cpu>=1.8.0 beautifulsoup4>=4.12.3 @@ -17,11 +11,9 @@ pandas>=2.2.2 python-dotenv>=1.0.1 tiktoken>=0.7 tqdm>=4.66.4 -graphviz>=0.20.3 minify-html>=0.15.0 free-proxy>=1.1.1 playwright>=1.43.0 -google>=3.0.0 undetected-playwright>=0.3.0 +google>=3.0.0 semchunk>=1.0.1 -browserbase>=0.3.0 diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 4119ee9a..f015278d 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -268,7 +268,10 @@ def handle_web_source(self, state, source): loader_kwargs = self.node_config.get("loader_kwargs", {}) if self.browser_base is not None: - from ..docloaders.browser_base import browser_base_fetch + try: + from ..docloaders.browser_base import browser_base_fetch + except ImportError: + raise ImportError("The browserbase module is not installed. Please install it using `pip install browserbase`.") data = browser_base_fetch(self.browser_base.get("api_key"), self.browser_base.get("project_id"), [source]) diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index 970a6790..966a758f 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -1,16 +1,13 @@ """ GenerateAnswerNode Module """ +from sys import modules from typing import List, Optional from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from langchain_core.runnables import RunnableParallel from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_mistralai import ChatMistralAI -from langchain_anthropic import ChatAnthropic -from langchain_groq import ChatGroq -from langchain_fireworks import ChatFireworks -from langchain_google_vertexai import ChatVertexAI from langchain_community.chat_models import ChatOllama from tqdm import tqdm from ..utils.logging import get_logger @@ -95,10 +92,18 @@ def execute(self, state: dict) -> dict: output_parser = JsonOutputParser(pydantic_object=self.node_config["schema"]) # Use built-in structured output for providers that allow it - if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI, ChatAnthropic, ChatFireworks, ChatGroq, ChatVertexAI)): - self.llm_model = self.llm_model.with_structured_output( - schema = self.node_config["schema"], - method="json_schema") + optional_modules = {"langchain_anthropic", "langchain_fireworks", "langchain_groq", "langchain_google_vertexai"} + if all(key in modules for key in optional_modules): + if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI, ChatAnthropic, ChatFireworks, ChatGroq, ChatVertexAI)): + self.llm_model = self.llm_model.with_structured_output( + schema = self.node_config["schema"], + method="json_schema") + else: + if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI)): + self.llm_model = self.llm_model.with_structured_output( + schema = self.node_config["schema"], + method="json_schema") + else: output_parser = JsonOutputParser() diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index 868044a0..974fa772 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -2,6 +2,7 @@ RAGNode Module """ import os +import sys from typing import List, Optional from langchain.docstore.document import Document from langchain.retrievers import ContextualCompressionRetriever @@ -13,18 +14,15 @@ from langchain_community.vectorstores import FAISS from langchain_community.chat_models import ChatOllama from langchain_aws import BedrockEmbeddings, ChatBedrock -from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings from langchain_community.embeddings import OllamaEmbeddings from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI -from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings -from langchain_fireworks import FireworksEmbeddings, ChatFireworks from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI -from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA from ..utils.logging import get_logger from .base_node import BaseNode from ..helpers import models_tokens from ..models import DeepSeek +optional_modules = {"langchain_anthropic", "langchain_fireworks", "langchain_groq", "langchain_google_vertexai"} class RAGNode(BaseNode): """ @@ -163,6 +161,7 @@ def _create_default_embedder(self, llm_config=None) -> object: Raises: ValueError: If the model is not supported. """ + if isinstance(self.llm_model, ChatGoogleGenerativeAI): return GoogleGenerativeAIEmbeddings( google_api_key=llm_config["api_key"], model="models/embedding-001" @@ -172,28 +171,28 @@ def _create_default_embedder(self, llm_config=None) -> object: base_url=self.llm_model.openai_api_base) elif isinstance(self.llm_model, DeepSeek): return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key) - elif isinstance(self.llm_model, ChatVertexAI): - return VertexAIEmbeddings() elif isinstance(self.llm_model, AzureOpenAIEmbeddings): return self.llm_model elif isinstance(self.llm_model, AzureChatOpenAI): return AzureOpenAIEmbeddings() - elif isinstance(self.llm_model, ChatFireworks): - return FireworksEmbeddings(model=self.llm_model.model_name) - elif isinstance(self.llm_model, ChatNVIDIA): - return NVIDIAEmbeddings(model=self.llm_model.model_name) elif isinstance(self.llm_model, ChatOllama): # unwrap the kwargs from the model whihc is a dict params = self.llm_model._lc_kwargs # remove streaming and temperature params.pop("streaming", None) params.pop("temperature", None) - return OllamaEmbeddings(**params) - elif isinstance(self.llm_model, ChatHuggingFace): - return HuggingFaceEmbeddings(model=self.llm_model.model) elif isinstance(self.llm_model, ChatBedrock): return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id) + elif all(key in sys.modules for key in optional_modules): + if isinstance(self.llm_model, ChatFireworks): + return FireworksEmbeddings(model=self.llm_model.model_name) + if isinstance(self.llm_model, ChatNVIDIA): + return NVIDIAEmbeddings(model=self.llm_model.model_name) + if isinstance(self.llm_model, ChatHuggingFace): + return HuggingFaceEmbeddings(model=self.llm_model.model) + if isinstance(self.llm_model, ChatVertexAI): + return VertexAIEmbeddings() else: raise ValueError("Embedding Model missing or not supported") @@ -218,14 +217,6 @@ def _create_embedder(self, embedder_config: dict) -> object: return OpenAIEmbeddings(api_key=embedder_params["api_key"]) if "azure" in embedder_params["model"]: return AzureOpenAIEmbeddings() - if "nvidia" in embedder_params["model"]: - embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) - try: - models_tokens["nvidia"][embedder_params["model"]] - except KeyError as exc: - raise KeyError("Model not supported") from exc - return NVIDIAEmbeddings(model=embedder_params["model"], - nvidia_api_key=embedder_params["api_key"]) if "ollama" in embedder_params["model"]: embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) try: @@ -233,20 +224,6 @@ def _create_embedder(self, embedder_config: dict) -> object: except KeyError as exc: raise KeyError("Model not supported") from exc return OllamaEmbeddings(**embedder_params) - if "hugging_face" in embedder_params["model"]: - embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) - try: - models_tokens["hugging_face"][embedder_params["model"]] - except KeyError as exc: - raise KeyError("Model not supported") from exc - return HuggingFaceEmbeddings(model=embedder_params["model"]) - if "fireworks" in embedder_params["model"]: - embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) - try: - models_tokens["fireworks"][embedder_params["model"]] - except KeyError as exc: - raise KeyError("Model not supported") from exc - return FireworksEmbeddings(model=embedder_params["model"]) if "gemini" in embedder_params["model"]: try: models_tokens["gemini"][embedder_params["model"]] @@ -261,5 +238,28 @@ def _create_embedder(self, embedder_config: dict) -> object: except KeyError as exc: raise KeyError("Model not supported") from exc return BedrockEmbeddings(client=client, model_id=embedder_params["model"]) + if all(key in sys.modules for key in optional_modules): + if "hugging_face" in embedder_params["model"]: + embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) + try: + models_tokens["hugging_face"][embedder_params["model"]] + except KeyError as exc: + raise KeyError("Model not supported") from exc + return HuggingFaceEmbeddings(model=embedder_params["model"]) + if "fireworks" in embedder_params["model"]: + embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) + try: + models_tokens["fireworks"][embedder_params["model"]] + except KeyError as exc: + raise KeyError("Model not supported") from exc + return FireworksEmbeddings(model=embedder_params["model"]) + if "nvidia" in embedder_params["model"]: + embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:]) + try: + models_tokens["nvidia"][embedder_params["model"]] + except KeyError as exc: + raise KeyError("Model not supported") from exc + return NVIDIAEmbeddings(model=embedder_params["model"], + nvidia_api_key=embedder_params["api_key"]) raise ValueError("Model provided by the configuration not supported") From 7789663338a89d27fde322ae282ce07ccca16845 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:17:51 +0200 Subject: [PATCH 11/21] fix(BurrBrige): dynamic imports --- scrapegraphai/integrations/burr_bridge.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scrapegraphai/integrations/burr_bridge.py b/scrapegraphai/integrations/burr_bridge.py index d1fe566f..e5eb3c6a 100644 --- a/scrapegraphai/integrations/burr_bridge.py +++ b/scrapegraphai/integrations/burr_bridge.py @@ -11,12 +11,13 @@ try: import burr + from burr import tracking + from burr.core import Application, ApplicationBuilder, State, Action, default, ApplicationContext + from burr.lifecycle import PostRunStepHook, PreRunStepHook except ImportError: raise ImportError("burr package is not installed. Please install it with 'pip install scrapegraphai[burr]'") -from burr import tracking -from burr.core import Application, ApplicationBuilder, State, Action, default, ApplicationContext -from burr.lifecycle import PostRunStepHook, PreRunStepHook + class PrintLnHook(PostRunStepHook, PreRunStepHook): From 050fa3faa02cb2a86ce7c0f61c99e4fa8cf3f9a5 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 27 Aug 2024 15:55:20 +0000 Subject: [PATCH 12/21] ci(release): 1.15.0-beta.6 [skip ci] ## [1.15.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.5...v1.15.0-beta.6) (2024-08-27) ### Bug Fixes * **docloaders:** BrowserBase dynamic import ([5c16ee9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c16ee985b11948c6a8c1dbfd051d458fa193973)) * **AbstractGraph:** correct and simplify instancing logic ([f73343f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f73343f19386b31878706963597c2565a023068d)) * **BurrBrige:** dynamic imports ([7789663](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7789663338a89d27fde322ae282ce07ccca16845)) * set up dynamic imports correctly ([83e71df](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83e71df2e2cb3b6bfba11f8879d5c4917a3e1837)) ### chore * **examples:** update model names ([f6df9b7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f6df9b75125b4cacbef4af29faf3e17a13ff108c)) ### Test * **AbstractGraph:** add AbstractGraph tests ([229d74d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/229d74d4bd39befa3723fa2841e23d40007a9772)) --- CHANGELOG.md | 20 ++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f82724e0..855d9029 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,23 @@ +## [1.15.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.5...v1.15.0-beta.6) (2024-08-27) + + +### Bug Fixes + +* **docloaders:** BrowserBase dynamic import ([5c16ee9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c16ee985b11948c6a8c1dbfd051d458fa193973)) +* **AbstractGraph:** correct and simplify instancing logic ([f73343f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f73343f19386b31878706963597c2565a023068d)) +* **BurrBrige:** dynamic imports ([7789663](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7789663338a89d27fde322ae282ce07ccca16845)) +* set up dynamic imports correctly ([83e71df](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83e71df2e2cb3b6bfba11f8879d5c4917a3e1837)) + + +### chore + +* **examples:** update model names ([f6df9b7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f6df9b75125b4cacbef4af29faf3e17a13ff108c)) + + +### Test + +* **AbstractGraph:** add AbstractGraph tests ([229d74d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/229d74d4bd39befa3723fa2841e23d40007a9772)) + ## [1.15.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.4...v1.15.0-beta.5) (2024-08-26) diff --git a/pyproject.toml b/pyproject.toml index 5afe841f..7521ce55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.15.0b5" +version = "1.15.0b6" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From cf73883451729b19034005ee7ebe618c1e256a11 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Tue, 27 Aug 2024 18:05:34 +0200 Subject: [PATCH 13/21] fix: bug for abstract graph --- examples/local_models/smart_scraper_ollama.py | 2 +- requirements-dev.lock | 1 - requirements.lock | 1 - scrapegraphai/graphs/abstract_graph.py | 2 +- tests/graphs/scrape_json_ollama_test.py | 50 ------------------- 5 files changed, 2 insertions(+), 54 deletions(-) delete mode 100644 tests/graphs/scrape_json_ollama_test.py diff --git a/examples/local_models/smart_scraper_ollama.py b/examples/local_models/smart_scraper_ollama.py index 3f6c0967..d5585ff7 100644 --- a/examples/local_models/smart_scraper_ollama.py +++ b/examples/local_models/smart_scraper_ollama.py @@ -9,7 +9,7 @@ graph_config = { "llm": { - "model": "ollama/mistral", + "model": "ollama/llama3.1", "temperature": 0, "format": "json", # Ollama needs the format to be specified explicitly # "base_url": "http://localhost:11434", # set ollama URL arbitrarily diff --git a/requirements-dev.lock b/requirements-dev.lock index 04ca69d9..b816db3d 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -130,7 +130,6 @@ graphviz==0.20.3 # via burr greenlet==3.0.3 # via playwright - # via sqlalchemy grpcio==1.65.4 # via google-api-core # via grpcio-status diff --git a/requirements.lock b/requirements.lock index f3cb5626..30d89366 100644 --- a/requirements.lock +++ b/requirements.lock @@ -83,7 +83,6 @@ googleapis-common-protos==1.63.2 # via grpcio-status greenlet==3.0.3 # via playwright - # via sqlalchemy grpcio==1.65.1 # via google-api-core # via grpcio-status diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 03fd30e2..58eb30f4 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -139,7 +139,7 @@ def _create_llm(self, llm_config: dict) -> object: raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.") try: - self.model_token = models_tokens[llm_params["model"]][llm_params["model"]] + self.model_token = models_tokens[llm_params["model_provider"]].get(llm_params["model"][0]) except KeyError: print("Model not found, using default token size (8192)") self.model_token = 8192 diff --git a/tests/graphs/scrape_json_ollama_test.py b/tests/graphs/scrape_json_ollama_test.py deleted file mode 100644 index 17ef80b1..00000000 --- a/tests/graphs/scrape_json_ollama_test.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -Module for scraping JSON documents -""" -import os -import json -import pytest - -from scrapegraphai.graphs import JSONScraperGraph - -# Load configuration from a JSON file -CONFIG_FILE = "config.json" -with open(CONFIG_FILE, "r") as f: - CONFIG = json.load(f) - -# Fixture to read the sample JSON file -@pytest.fixture -def sample_json(): - """ - Read the sample JSON file - """ - file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json") - with open(file_path, "r", encoding="utf-8") as file: - text = file.read() - return text - -# Parametrized fixture to load graph configurations -@pytest.fixture(params=CONFIG["graph_configs"]) -def graph_config(request): - """ - Load graph configuration - """ - return request.param - -# Test function for the scraping pipeline -def test_scraping_pipeline(sample_json, graph_config): - """ - Test the scraping pipeline - """ - expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles - - smart_scraper_graph = JSONScraperGraph( - prompt="List me all the titles", - source=sample_json, - config=graph_config - ) - result = smart_scraper_graph.run() - - assert result is not None - assert isinstance(result, list) - assert sorted(result) == sorted(expected_titles) From be3f1ec58d6354d583401f51f310f6aac987a393 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 27 Aug 2024 16:07:11 +0000 Subject: [PATCH 14/21] ci(release): 1.15.0-beta.7 [skip ci] ## [1.15.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.6...v1.15.0-beta.7) (2024-08-27) ### Bug Fixes * bug for abstract graph ([cf73883](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf73883451729b19034005ee7ebe618c1e256a11)) --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 855d9029..fb10129b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.15.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.6...v1.15.0-beta.7) (2024-08-27) + + +### Bug Fixes + +* bug for abstract graph ([cf73883](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf73883451729b19034005ee7ebe618c1e256a11)) + ## [1.15.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.5...v1.15.0-beta.6) (2024-08-27) diff --git a/pyproject.toml b/pyproject.toml index 7521ce55..91ebc501 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.15.0b6" +version = "1.15.0b7" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From df70b4f75d60cda2dddff3c707dc0edb34b80ee6 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Tue, 27 Aug 2024 18:08:53 +0200 Subject: [PATCH 15/21] Update abstract_graph.py --- scrapegraphai/graphs/abstract_graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 58eb30f4..067af7d4 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -125,19 +125,19 @@ def _create_llm(self, llm_config: dict) -> object: self.model_token = llm_params["model_tokens"] except KeyError as exc: raise KeyError("model_tokens not specified") from exc - return llm_params["model_instance"] + return llm_params["model_instance"] known_providers = {"openai", "azure_openai", "google_genai", "google_vertexai", "ollama", "oneapi", "nvidia", "groq", "anthropic" "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", "fireworks"} - + split_model_provider = llm_params["model"].split("/") llm_params["model_provider"] = split_model_provider[0] llm_params["model"] = split_model_provider[1:] if llm_params["model_provider"] not in known_providers: raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.") - + try: self.model_token = models_tokens[llm_params["model_provider"]].get(llm_params["model"][0]) except KeyError: From bda30a9ecbaa65668a3f21fbc9a5c50dcd21576c Mon Sep 17 00:00:00 2001 From: ajenkins Date: Wed, 28 Aug 2024 16:48:00 +1000 Subject: [PATCH 16/21] swapped failing imports (local Gemini and non-imported Ernie) for langchain chat models in graph_builder.py --- scrapegraphai/builders/graph_builder.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py index 303f1457..69ebe492 100644 --- a/scrapegraphai/builders/graph_builder.py +++ b/scrapegraphai/builders/graph_builder.py @@ -4,10 +4,12 @@ from langchain_core.prompts import ChatPromptTemplate from langchain.chains import create_extraction_chain -from ..models import Gemini -from ..helpers import nodes_metadata, graph_schema +from langchain_community.chat_models import ErnieBotChat +from langchain_google_genai import ChatGoogleGenerativeAI from langchain_openai import ChatOpenAI +from ..helpers import nodes_metadata, graph_schema + class GraphBuilder: """ GraphBuilder is a dynamic tool for constructing web scraping graphs based on user prompts. @@ -72,9 +74,9 @@ def _create_llm(self, llm_config: dict): if "gpt-" in llm_params["model"]: return ChatOpenAI(llm_params) elif "gemini" in llm_params["model"]: - return Gemini(llm_params) + return ChatGoogleGenerativeAI(llm_params) elif "ernie" in llm_params["model"]: - return Ernie(llm_params) + return ErnieBotChat(llm_params) raise ValueError("Model not supported") def _generate_nodes_description(self): From 4f120e29c546373a2cc06c102cc9886cc5270c06 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:36:03 +0200 Subject: [PATCH 17/21] fix(AbstractGraph): model selection bug --- scrapegraphai/graphs/abstract_graph.py | 19 +++--- tests/graphs/abstract_graph_test.py | 83 +++++++++++++++++++++----- 2 files changed, 78 insertions(+), 24 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 067af7d4..0d02b6d4 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -131,15 +131,15 @@ def _create_llm(self, llm_config: dict) -> object: "ollama", "oneapi", "nvidia", "groq", "anthropic" "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", "fireworks"} - split_model_provider = llm_params["model"].split("/") + split_model_provider = llm_params["model"].split("/", 1) llm_params["model_provider"] = split_model_provider[0] - llm_params["model"] = split_model_provider[1:] + llm_params["model"] = split_model_provider[1] if llm_params["model_provider"] not in known_providers: raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.") try: - self.model_token = models_tokens[llm_params["model_provider"]].get(llm_params["model"][0]) + self.model_token = models_tokens[llm_params["model_provider"]][llm_params["model"]] except KeyError: print("Model not found, using default token size (8192)") self.model_token = 8192 @@ -150,18 +150,21 @@ def _create_llm(self, llm_config: dict) -> object: warnings.simplefilter("ignore") return init_chat_model(**llm_params) else: - if "deepseek" in llm_params["model"]: + if llm_params["model_provider"] == "deepseek": return DeepSeek(**llm_params) - if "ernie" in llm_params["model"]: + if llm_params["model_provider"] == "ernie": from langchain_community.chat_models import ErnieBotChat return ErnieBotChat(**llm_params) - if "oneapi" in llm_params["model"]: + if llm_params["model_provider"] == "oneapi": return OneApi(**llm_params) - if "nvidia" in llm_params["model"]: - from langchain_nvidia_ai_endpoints import ChatNVIDIA + if llm_params["model_provider"] == "nvidia": + try: + from langchain_nvidia_ai_endpoints import ChatNVIDIA + except ImportError: + raise ImportError("The langchain_nvidia_ai_endpoints module is not installed. Please install it using `pip install langchain_nvidia_ai_endpoints`.") return ChatNVIDIA(**llm_params) except Exception as e: diff --git a/tests/graphs/abstract_graph_test.py b/tests/graphs/abstract_graph_test.py index 805a1691..f52c9b32 100644 --- a/tests/graphs/abstract_graph_test.py +++ b/tests/graphs/abstract_graph_test.py @@ -3,29 +3,80 @@ """ import pytest from unittest.mock import patch -from scrapegraphai.graphs import AbstractGraph +from scrapegraphai.graphs import AbstractGraph, BaseGraph +from scrapegraphai.nodes import ( + FetchNode, + ParseNode +) +from scrapegraphai.models import OneApi, DeepSeek +from langchain_openai import ChatOpenAI, AzureChatOpenAI +from langchain_community.chat_models import ChatOllama +from langchain_google_genai import ChatGoogleGenerativeAI + + + +class TestGraph(AbstractGraph): + def __init__(self, prompt: str, config: dict): + super().__init__(prompt, config) + + def _create_graph(self) -> BaseGraph: + fetch_node = FetchNode( + input="url| local_dir", + output=["doc", "link_urls", "img_urls"], + node_config={ + "llm_model": self.llm_model, + "force": self.config.get("force", False), + "cut": self.config.get("cut", True), + "loader_kwargs": self.config.get("loader_kwargs", {}), + "browser_base": self.config.get("browser_base") + } + ) + parse_node = ParseNode( + input="doc", + output=["parsed_doc"], + node_config={ + "chunk_size": self.model_token + } + ) + return BaseGraph( + nodes=[ + fetch_node, + parse_node + ], + edges=[ + (fetch_node, parse_node), + ], + entry_point=fetch_node, + graph_name=self.__class__.__name__ + ) + + def run(self) -> str: + inputs = {"user_prompt": self.prompt, self.input_key: self.source} + self.final_state, self.execution_info = self.graph.execute(inputs) + + return self.final_state.get("answer", "No answer found.") + class TestAbstractGraph: @pytest.mark.parametrize("llm_config, expected_model", [ - ({"model": "openai/gpt-3.5-turbo"}, "ChatOpenAI"), - ({"model": "azure_openai/gpt-3.5-turbo"}, "AzureChatOpenAI"), - ({"model": "google_genai/gemini-pro"}, "ChatGoogleGenerativeAI"), - ({"model": "google_vertexai/chat-bison"}, "ChatVertexAI"), - ({"model": "ollama/llama2"}, "Ollama"), - ({"model": "oneapi/text-davinci-003"}, "OneApi"), - ({"model": "nvidia/clara-instant-1-base"}, "ChatNVIDIA"), - ({"model": "deepseek/deepseek-coder-6.7b-instruct"}, "DeepSeek"), - ({"model": "ernie/ernie-bot"}, "ErnieBotChat"), + ({"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-randomtest001"}, ChatOpenAI), + ({ + "model": "azure_openai/gpt-3.5-turbo", + "api_key": "random-api-key", + "api_version": "no version", + "azure_endpoint": "https://www.example.com/"}, + AzureChatOpenAI), + ({"model": "google_genai/gemini-pro", "google_api_key": "google-key-test"}, ChatGoogleGenerativeAI), + ({"model": "ollama/llama2"}, ChatOllama), + ({"model": "oneapi/qwen-turbo"}, OneApi), + ({"model": "deepseek/deepseek-coder"}, DeepSeek), ]) + def test_create_llm(self, llm_config, expected_model): - graph = AbstractGraph("Test prompt", {"llm": llm_config}) + graph = TestGraph("Test prompt", {"llm": llm_config}) assert isinstance(graph.llm_model, expected_model) def test_create_llm_unknown_provider(self): with pytest.raises(ValueError): - AbstractGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}}) + TestGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}}) - def test_create_llm_error(self): - with patch("your_module.init_chat_model", side_effect=Exception("Test error")): - with pytest.raises(Exception): - AbstractGraph("Test prompt", {"llm": {"model": "openai/gpt-3.5-turbo"}}) From f7a85c266ae758cc16297ebc5d98f8919a80c523 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:51:50 +0200 Subject: [PATCH 18/21] fix(models): better DeepSeek and OneApi integration --- examples/deepseek/csv_scraper_deepseek.py | 3 +-- examples/deepseek/csv_scraper_graph_multi_deepseek.py | 3 +-- examples/deepseek/json_scraper_deepseek.py | 3 +-- examples/deepseek/json_scraper_multi_deepseek.py | 3 +-- examples/deepseek/pdf_scraper_graph_deepseek.py | 3 +-- examples/deepseek/pdf_scraper_multi_deepseek.py | 3 +-- examples/deepseek/scrape_plain_text_deepseek.py | 3 +-- examples/deepseek/script_generator_deepseek.py | 3 +-- examples/deepseek/script_multi_generator_deepseek.py | 3 +-- examples/deepseek/search_graph_deepseek.py | 3 +-- examples/deepseek/search_graph_schema_deepseek.py | 3 +-- examples/deepseek/search_link_graph_deepseek.py | 3 +-- examples/deepseek/smart_scraper_deepseek.py | 3 +-- examples/deepseek/smart_scraper_multi_deepseek.py | 3 +-- examples/deepseek/smart_scraper_schema_deepseek.py | 3 +-- examples/deepseek/xml_scraper_deepseek.py | 3 +-- examples/deepseek/xml_scraper_graph_multi_deepseek.py | 3 +-- examples/oneapi/custom_graph_oneapi.py | 2 +- scrapegraphai/models/deepseek.py | 6 +++++- scrapegraphai/models/oneapi.py | 4 +++- tests/graphs/abstract_graph_test.py | 4 ++-- 21 files changed, 28 insertions(+), 39 deletions(-) diff --git a/examples/deepseek/csv_scraper_deepseek.py b/examples/deepseek/csv_scraper_deepseek.py index 60b1c394..26ff26ee 100644 --- a/examples/deepseek/csv_scraper_deepseek.py +++ b/examples/deepseek/csv_scraper_deepseek.py @@ -28,8 +28,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/csv_scraper_graph_multi_deepseek.py b/examples/deepseek/csv_scraper_graph_multi_deepseek.py index 0a08f83f..88056648 100644 --- a/examples/deepseek/csv_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/csv_scraper_graph_multi_deepseek.py @@ -28,8 +28,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py index 02991c0d..5d8bf152 100644 --- a/examples/deepseek/json_scraper_deepseek.py +++ b/examples/deepseek/json_scraper_deepseek.py @@ -27,8 +27,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/json_scraper_multi_deepseek.py b/examples/deepseek/json_scraper_multi_deepseek.py index 4f9ca32d..893937cd 100644 --- a/examples/deepseek/json_scraper_multi_deepseek.py +++ b/examples/deepseek/json_scraper_multi_deepseek.py @@ -13,8 +13,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py index c9c5e0b2..990e7369 100644 --- a/examples/deepseek/pdf_scraper_graph_deepseek.py +++ b/examples/deepseek/pdf_scraper_graph_deepseek.py @@ -18,8 +18,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py index e43dd10a..59727a62 100644 --- a/examples/deepseek/pdf_scraper_multi_deepseek.py +++ b/examples/deepseek/pdf_scraper_multi_deepseek.py @@ -13,8 +13,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/scrape_plain_text_deepseek.py b/examples/deepseek/scrape_plain_text_deepseek.py index a7834a8f..52128737 100644 --- a/examples/deepseek/scrape_plain_text_deepseek.py +++ b/examples/deepseek/scrape_plain_text_deepseek.py @@ -29,8 +29,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/script_generator_deepseek.py b/examples/deepseek/script_generator_deepseek.py index 3de06f25..eaec5232 100644 --- a/examples/deepseek/script_generator_deepseek.py +++ b/examples/deepseek/script_generator_deepseek.py @@ -18,8 +18,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "library": "beautifulsoup" } diff --git a/examples/deepseek/script_multi_generator_deepseek.py b/examples/deepseek/script_multi_generator_deepseek.py index cc577ecd..150298ed 100644 --- a/examples/deepseek/script_multi_generator_deepseek.py +++ b/examples/deepseek/script_multi_generator_deepseek.py @@ -18,8 +18,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "library": "beautifulsoup" } diff --git a/examples/deepseek/search_graph_deepseek.py b/examples/deepseek/search_graph_deepseek.py index 54d2e9fa..e7c2483c 100644 --- a/examples/deepseek/search_graph_deepseek.py +++ b/examples/deepseek/search_graph_deepseek.py @@ -16,8 +16,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "max_results": 2, "verbose": True, diff --git a/examples/deepseek/search_graph_schema_deepseek.py b/examples/deepseek/search_graph_schema_deepseek.py index bcebe76d..1471ede1 100644 --- a/examples/deepseek/search_graph_schema_deepseek.py +++ b/examples/deepseek/search_graph_schema_deepseek.py @@ -32,8 +32,7 @@ class Dishes(BaseModel): graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/search_link_graph_deepseek.py b/examples/deepseek/search_link_graph_deepseek.py index 96f886a9..dac13737 100644 --- a/examples/deepseek/search_link_graph_deepseek.py +++ b/examples/deepseek/search_link_graph_deepseek.py @@ -17,8 +17,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/smart_scraper_deepseek.py b/examples/deepseek/smart_scraper_deepseek.py index 50314819..4c49b160 100644 --- a/examples/deepseek/smart_scraper_deepseek.py +++ b/examples/deepseek/smart_scraper_deepseek.py @@ -19,8 +19,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/smart_scraper_multi_deepseek.py b/examples/deepseek/smart_scraper_multi_deepseek.py index 374cc6e2..2ef062de 100644 --- a/examples/deepseek/smart_scraper_multi_deepseek.py +++ b/examples/deepseek/smart_scraper_multi_deepseek.py @@ -17,8 +17,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/smart_scraper_schema_deepseek.py b/examples/deepseek/smart_scraper_schema_deepseek.py index 6d164eb1..722e02bf 100644 --- a/examples/deepseek/smart_scraper_schema_deepseek.py +++ b/examples/deepseek/smart_scraper_schema_deepseek.py @@ -31,8 +31,7 @@ class Projects(BaseModel): graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/xml_scraper_deepseek.py b/examples/deepseek/xml_scraper_deepseek.py index d69665f4..02178c4b 100644 --- a/examples/deepseek/xml_scraper_deepseek.py +++ b/examples/deepseek/xml_scraper_deepseek.py @@ -29,8 +29,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/deepseek/xml_scraper_graph_multi_deepseek.py b/examples/deepseek/xml_scraper_graph_multi_deepseek.py index 5098c9fd..ae74ba21 100644 --- a/examples/deepseek/xml_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/xml_scraper_graph_multi_deepseek.py @@ -28,8 +28,7 @@ graph_config = { "llm": { "model": "deepseek/deepseek-chat", - "openai_api_key": deepseek_key, - "openai_api_base": 'https://api.deepseek.com/v1', + "api_key": deepseek_key, }, "verbose": True, } diff --git a/examples/oneapi/custom_graph_oneapi.py b/examples/oneapi/custom_graph_oneapi.py index 5777ab33..be58d1d1 100644 --- a/examples/oneapi/custom_graph_oneapi.py +++ b/examples/oneapi/custom_graph_oneapi.py @@ -22,7 +22,7 @@ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph diff --git a/scrapegraphai/models/deepseek.py b/scrapegraphai/models/deepseek.py index 523fe667..31b2bd5d 100644 --- a/scrapegraphai/models/deepseek.py +++ b/scrapegraphai/models/deepseek.py @@ -14,5 +14,9 @@ class DeepSeek(ChatOpenAI): llm_config (dict): Configuration parameters for the language model. """ - def __init__(self, llm_config: dict): + def __init__(self, **llm_config): + if 'api_key' in llm_config: + llm_config['openai_api_key'] = llm_config.pop('api_key') + llm_config['openai_api_base'] = 'https://api.deepseek.com/v1' + super().__init__(**llm_config) diff --git a/scrapegraphai/models/oneapi.py b/scrapegraphai/models/oneapi.py index 54e846d9..9b20621b 100644 --- a/scrapegraphai/models/oneapi.py +++ b/scrapegraphai/models/oneapi.py @@ -13,5 +13,7 @@ class OneApi(ChatOpenAI): llm_config (dict): Configuration parameters for the language model. """ - def __init__(self, llm_config: dict): + def __init__(self, **llm_config): + if 'api_key' in llm_config: + llm_config['openai_api_key'] = llm_config.pop('api_key') super().__init__(**llm_config) diff --git a/tests/graphs/abstract_graph_test.py b/tests/graphs/abstract_graph_test.py index f52c9b32..60c8ab4c 100644 --- a/tests/graphs/abstract_graph_test.py +++ b/tests/graphs/abstract_graph_test.py @@ -68,8 +68,8 @@ class TestAbstractGraph: AzureChatOpenAI), ({"model": "google_genai/gemini-pro", "google_api_key": "google-key-test"}, ChatGoogleGenerativeAI), ({"model": "ollama/llama2"}, ChatOllama), - ({"model": "oneapi/qwen-turbo"}, OneApi), - ({"model": "deepseek/deepseek-coder"}, DeepSeek), + ({"model": "oneapi/qwen-turbo", "api_key": "oneapi-api-key"}, OneApi), + ({"model": "deepseek/deepseek-coder", "api_key": "deepseek-api-key"}, DeepSeek), ]) def test_create_llm(self, llm_config, expected_model): From dbec55064feac8dfe01290bf82b5b47b013b589d Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 28 Aug 2024 14:12:05 +0000 Subject: [PATCH 19/21] ci(release): 1.15.0-beta.8 [skip ci] ## [1.15.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.7...v1.15.0-beta.8) (2024-08-28) ### Bug Fixes * **models:** better DeepSeek and OneApi integration ([f7a85c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f7a85c266ae758cc16297ebc5d98f8919a80c523)) * **AbstractGraph:** model selection bug ([4f120e2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4f120e29c546373a2cc06c102cc9886cc5270c06)) --- CHANGELOG.md | 8 ++++++++ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb10129b..af26c607 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## [1.15.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.7...v1.15.0-beta.8) (2024-08-28) + + +### Bug Fixes + +* **models:** better DeepSeek and OneApi integration ([f7a85c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f7a85c266ae758cc16297ebc5d98f8919a80c523)) +* **AbstractGraph:** model selection bug ([4f120e2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4f120e29c546373a2cc06c102cc9886cc5270c06)) + ## [1.15.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.6...v1.15.0-beta.7) (2024-08-27) diff --git a/pyproject.toml b/pyproject.toml index 91ebc501..0c3d26aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.15.0b7" +version = "1.15.0b8" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From d29f7471514a21c05161ccece117d95d8aef560b Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 28 Aug 2024 20:19:06 +0200 Subject: [PATCH 20/21] Update script_generator_openai.py --- examples/openai/script_generator_openai.py | 29 +++++++++++----------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/openai/script_generator_openai.py b/examples/openai/script_generator_openai.py index e67ad52b..119f67e5 100644 --- a/examples/openai/script_generator_openai.py +++ b/examples/openai/script_generator_openai.py @@ -1,10 +1,11 @@ """ -Basic example of scraping pipeline using ScriptCreatorGraph +Basic example of scraping pipeline using SmartScraper """ import os +import json from dotenv import load_dotenv -from scrapegraphai.graphs import ScriptCreatorGraph +from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.utils import prettify_exec_info load_dotenv() @@ -13,34 +14,32 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") graph_config = { "llm": { - "api_key": openai_key, - "model": "openai/gpt-4o", + "api_key": os.getenv("OPENAI_API_KEY"), + "model": "gpt-4o", }, - "library": "beautifulsoup" + "verbose": True, + "headless": False, } # ************************************************ -# Create the ScriptCreatorGraph instance and run it +# Create the SmartScraperGraph instance and run it # ************************************************ -script_creator_graph = ScriptCreatorGraph( - prompt="List me all the projects with their description.", - # also accepts a string with the already downloaded HTML code - source="https://perinim.github.io/projects", +smart_scraper_graph = SmartScraperGraph( + prompt="List me what does the company do, the name and a contact email.", + source="https://scrapegraphai.com/", config=graph_config ) -result = script_creator_graph.run() -print(result) +result = smart_scraper_graph.run() +print(json.dumps(result, indent=4)) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = script_creator_graph.get_execution_info() +graph_exec_info = smart_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) - From 8f38a6bf15c2138471d7bdb9e0236f02389d93bb Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 28 Aug 2024 19:31:07 +0000 Subject: [PATCH 21/21] ci(release): 1.15.1-beta.1 [skip ci] ## [1.15.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0...v1.15.1-beta.1) (2024-08-28) ### Bug Fixes * abstract graph local model ([04128e7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04128e7e9f585aaf774fabf646c4d9d3b96b8333)) * **models:** better DeepSeek and OneApi integration ([f7a85c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f7a85c266ae758cc16297ebc5d98f8919a80c523)) * **docloaders:** BrowserBase dynamic import ([5c16ee9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c16ee985b11948c6a8c1dbfd051d458fa193973)) * bug for abstract graph ([cf73883](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf73883451729b19034005ee7ebe618c1e256a11)) * **AbstractGraph:** correct and simplify instancing logic ([f73343f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f73343f19386b31878706963597c2565a023068d)) * **BurrBrige:** dynamic imports ([7789663](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7789663338a89d27fde322ae282ce07ccca16845)) * **AbstractGraph:** model selection bug ([4f120e2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4f120e29c546373a2cc06c102cc9886cc5270c06)) * set up dynamic imports correctly ([83e71df](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83e71df2e2cb3b6bfba11f8879d5c4917a3e1837)) ### chore * **examples:** update model names ([f6df9b7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f6df9b75125b4cacbef4af29faf3e17a13ff108c)) * update README.md ([5f562b8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5f562b89bd63eba1300afe98572f152a0621b370)) ### Test * **AbstractGraph:** add AbstractGraph tests ([229d74d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/229d74d4bd39befa3723fa2841e23d40007a9772)) ### CI * **release:** 1.15.0-beta.4 [skip ci] ([c1ce9c6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c1ce9c69d4ba746d488891d18fa64460e76124bf)) * **release:** 1.15.0-beta.5 [skip ci] ([22ab45f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/22ab45f6bda3a12ab01c743fd124448a2e26cd46)) * **release:** 1.15.0-beta.6 [skip ci] ([050fa3f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/050fa3faa02cb2a86ce7c0f61c99e4fa8cf3f9a5)) * **release:** 1.15.0-beta.7 [skip ci] ([be3f1ec](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/be3f1ec58d6354d583401f51f310f6aac987a393)) * **release:** 1.15.0-beta.8 [skip ci] ([dbec550](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/dbec55064feac8dfe01290bf82b5b47b013b589d)) --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39dd8a82..632ca55b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ +## [1.15.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0...v1.15.1-beta.1) (2024-08-28) +### Bug Fixes + +* abstract graph local model ([04128e7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04128e7e9f585aaf774fabf646c4d9d3b96b8333)) +* **models:** better DeepSeek and OneApi integration ([f7a85c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f7a85c266ae758cc16297ebc5d98f8919a80c523)) +* **docloaders:** BrowserBase dynamic import ([5c16ee9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c16ee985b11948c6a8c1dbfd051d458fa193973)) +* bug for abstract graph ([cf73883](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf73883451729b19034005ee7ebe618c1e256a11)) +* **AbstractGraph:** correct and simplify instancing logic ([f73343f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f73343f19386b31878706963597c2565a023068d)) +* **BurrBrige:** dynamic imports ([7789663](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7789663338a89d27fde322ae282ce07ccca16845)) +* **AbstractGraph:** model selection bug ([4f120e2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4f120e29c546373a2cc06c102cc9886cc5270c06)) +* set up dynamic imports correctly ([83e71df](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83e71df2e2cb3b6bfba11f8879d5c4917a3e1837)) + + +### chore + +* **examples:** update model names ([f6df9b7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f6df9b75125b4cacbef4af29faf3e17a13ff108c)) +* update README.md ([5f562b8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5f562b89bd63eba1300afe98572f152a0621b370)) + + +### Test + +* **AbstractGraph:** add AbstractGraph tests ([229d74d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/229d74d4bd39befa3723fa2841e23d40007a9772)) + + +### CI + +* **release:** 1.15.0-beta.4 [skip ci] ([c1ce9c6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c1ce9c69d4ba746d488891d18fa64460e76124bf)) +* **release:** 1.15.0-beta.5 [skip ci] ([22ab45f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/22ab45f6bda3a12ab01c743fd124448a2e26cd46)) +* **release:** 1.15.0-beta.6 [skip ci] ([050fa3f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/050fa3faa02cb2a86ce7c0f61c99e4fa8cf3f9a5)) +* **release:** 1.15.0-beta.7 [skip ci] ([be3f1ec](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/be3f1ec58d6354d583401f51f310f6aac987a393)) +* **release:** 1.15.0-beta.8 [skip ci] ([dbec550](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/dbec55064feac8dfe01290bf82b5b47b013b589d)) + ## [1.15.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.7...v1.15.0-beta.8) (2024-08-28) diff --git a/pyproject.toml b/pyproject.toml index a43694ed..ec0b46b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "scrapegraphai" -version = "1.15.0b8" +version = "1.15.1b1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [