1313# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
1414import os
1515import warnings
16- from typing import IO , Any , Dict , List , Optional , Union
16+ from io import IOBase
17+ from typing import Any , Dict , List , Optional , Union
1718from urllib .parse import urlparse
1819
1920from camel .embeddings import BaseEmbedding , OpenAIEmbedding
@@ -72,7 +73,7 @@ def __init__(
7273
7374 def process (
7475 self ,
75- content : Union [str , Element , IO [ bytes ] ],
76+ content : Union [str , Element , IOBase ],
7677 chunk_type : str = "chunk_by_title" ,
7778 max_characters : int = 500 ,
7879 embed_batch : int = 50 ,
@@ -85,7 +86,7 @@ def process(
8586 specified vector storage.
8687
8788 Args:
88- content (Union[str, Element, IO[bytes] ]): Local file path, remote
89+ content (Union[str, Element, IOBase ]): Local file path, remote
8990 URL, string content, Element object, or a binary file object.
9091 chunk_type (str): Type of chunking going to apply. Defaults to
9192 "chunk_by_title".
@@ -98,7 +99,7 @@ def process(
9899 """
99100 if isinstance (content , Element ):
100101 elements = [content ]
101- elif isinstance (content , IO ):
102+ elif isinstance (content , IOBase ):
102103 elements = self .uio .parse_bytes (file = content , ** kwargs ) or []
103104 else :
104105 # Check if the content is URL
@@ -138,6 +139,8 @@ def process(
138139 for vector , chunk in zip (batch_vectors , batch_chunks ):
139140 if isinstance (content , str ):
140141 content_path_info = {"content path" : content }
142+ elif isinstance (content , IOBase ):
143+ content_path_info = {"content path" : "From file bytes" }
141144 elif isinstance (content , Element ):
142145 content_path_info = {
143146 "content path" : content .metadata .file_directory
0 commit comments