Skip to content

Commit 1d41f6e

Browse files
authored
fix: md conversion
1 parent a711186 commit 1d41f6e

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

scrapegraphai/nodes/fetch_node.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,10 @@ def execute(self, state):
163163
if not source.strip():
164164
raise ValueError("No HTML body content found in the local source.")
165165

166-
parsed_content = source
167-
168-
if isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator:
166+
if (not self.script_creator) or (self.force and not self.script_creator):
169167
parsed_content = convert_to_md(source)
168+
else:
169+
parsed_content = source
170170

171171
compressed_document = [
172172
Document(page_content=parsed_content, metadata={"source": "local_dir"})
@@ -184,8 +184,8 @@ def execute(self, state):
184184
if not self.cut:
185185
parsed_content = cleanup_html(response, source)
186186

187-
if (isinstance(self.llm_model, OpenAI) and not self.script_creator) or (self.force and not self.script_creator):
188-
parsed_content = convert_to_md(source, input_data[0])
187+
if (not self.script_creator) or (self.force and not self.script_creator):
188+
parsed_content = convert_to_md(parsed_content, source)
189189
compressed_document = [Document(page_content=parsed_content)]
190190
else:
191191
self.logger.warning(
@@ -206,9 +206,9 @@ def execute(self, state):
206206
raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.")
207207
parsed_content = document[0].page_content
208208

209-
if isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled:
209+
if (not self.script_creator) or (self.force and not self.script_creator and not self.openai_md_enabled):
210210

211-
parsed_content = convert_to_md(document[0].page_content, input_data[0])
211+
parsed_content = convert_to_md(document[0].page_content, source)
212212

213213

214214
compressed_document = [

scrapegraphai/utils/convert_to_md.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def convert_to_md(html: str, url: str = None) -> str:
2323
h = html2text.HTML2Text()
2424
h.ignore_links = False
2525
h.body_width = 0
26-
if url:
26+
if url is not None:
2727
parsed_url = urlparse(url)
2828
domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
2929
h.baseurl = domain

0 commit comments

Comments
 (0)