diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index a0514f37..a47b3b19 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -4,7 +4,7 @@ import json from typing import List, Optional -from langchain_openai import ChatOpenAI +from langchain_openai import ChatOpenAI, AzureChatOpenAI import pandas as pd import requests from langchain_community.document_loaders import PyPDFLoader @@ -221,7 +221,7 @@ def handle_local_source(self, state, source): parsed_content = source - if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator: + if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator or self.force and not self.script_creator: parsed_content = convert_to_md(source) else: parsed_content = source @@ -258,7 +258,7 @@ def handle_web_source(self, state, source): if not self.cut: parsed_content = cleanup_html(response, source) - if (isinstance(self.llm_model, ChatOpenAI) + if ((isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator) or (self.force and not self.script_creator): parsed_content = convert_to_md(source, parsed_content) @@ -287,7 +287,7 @@ def handle_web_source(self, state, source): raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.") parsed_content = document[0].page_content - if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled: + if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled: parsed_content = convert_to_md(document[0].page_content, parsed_content) compressed_document = [