From 5eed0a7654e6d08fceaa3e7c39928ca1e61ea60c Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Sat, 17 Aug 2024 10:33:25 +0200 Subject: [PATCH] fix(FetchNode) add OpenAI optimization to GPT models on Azure closes #547 --- scrapegraphai/nodes/fetch_node.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index a0514f37..a47b3b19 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -4,7 +4,7 @@ import json from typing import List, Optional -from langchain_openai import ChatOpenAI +from langchain_openai import ChatOpenAI, AzureChatOpenAI import pandas as pd import requests from langchain_community.document_loaders import PyPDFLoader @@ -221,7 +221,7 @@ def handle_local_source(self, state, source): parsed_content = source - if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator: + if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator or self.force and not self.script_creator: parsed_content = convert_to_md(source) else: parsed_content = source @@ -258,7 +258,7 @@ def handle_web_source(self, state, source): if not self.cut: parsed_content = cleanup_html(response, source) - if (isinstance(self.llm_model, ChatOpenAI) + if ((isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator) or (self.force and not self.script_creator): parsed_content = convert_to_md(source, parsed_content) @@ -287,7 +287,7 @@ def handle_web_source(self, state, source): raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.") parsed_content = document[0].page_content - if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled: + if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled: parsed_content = convert_to_md(document[0].page_content, parsed_content) compressed_document = [