UnstructuredWordDocumentLoader to load .doc file error #29778
Unanswered
LiuChao888
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Checked other resources
Commit to Help
Example Code
Description
such as my code
when I run this code ,it will return a error :
please help me to resolve it . you can give me some correct codes. thank you !
Traceback (most recent call last):
File "/home/jh/liuchao_project/agents_langchan_ollama/langchian_RAG_FAISS.py", line 214, in
main()
File "/home/jh/liuchao_project/agents_langchan_ollama/langchian_RAG_FAISS.py", line 171, in main
faiss_rag.create_vectorstore(file_path_or_dir=DOCS_DIR)
File "/home/jh/liuchao_project/agents_langchan_ollama/langchian_RAG_FAISS.py", line 79, in create_vectorstore
docs, dir_path = self.load_and_split_documents(file_path_or_dir)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/liuchao_project/agents_langchan_ollama/langchian_RAG_FAISS.py", line 60, in load_and_split_documents
documents.extend(loader.load())
^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/langchain_core/document_loaders/base.py", line 31, in load
return list(self.lazy_load())
^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/langchain_community/document_loaders/unstructured.py", line 107, in lazy_load
elements = self._get_elements()
^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/langchain_community/document_loaders/unstructured.py", line 228, in _get_elements
return partition(filename=self.file_path, **self.unstructured_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/auto.py", line 287, in partition
partition = partitioner_loader.get(file_type)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/auto.py", line 360, in get
self._partitioners[file_type] = self._load_partitioner(file_type)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/auto.py", line 378, in _load_partitioner
partitioner_module = importlib.import_module(file_type.partitioner_module_qname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/importlib/init.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "", line 1204, in _gcd_import
File "", line 1176, in _find_and_load
File "", line 1147, in _find_and_load_unlocked
File "", line 690, in _load_unlocked
File "", line 940, in exec_module
File "", line 241, in _call_with_frames_removed
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/doc.py", line 11, in
from unstructured.partition.docx import partition_docx
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/docx.py", line 47, in
from unstructured.partition.text_type import (
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/partition/text_type.py", line 20, in
from unstructured.nlp.tokenize import pos_tag, sent_tokenize, word_tokenize
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/nlp/tokenize.py", line 48, in
download_nltk_packages()
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/nlp/tokenize.py", line 37, in download_nltk_packages
tokenizer_available = check_for_nltk_package(
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/unstructured/nlp/tokenize.py", line 24, in check_for_nltk_package
nltk.find(f"{package_category}/{package_name}", paths=paths)
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/nltk/data.py", line 551, in find
return find(modified_name, paths)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/nltk/data.py", line 538, in find
return ZipFilePathPointer(p, zipentry)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/nltk/data.py", line 391, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/site-packages/nltk/data.py", line 1020, in init
zipfile.ZipFile.init(self, filename)
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/zipfile.py", line 1312, in init
self._RealGetContents()
File "/home/jh/anaconda3/envs/liuchao/lib/python3.11/zipfile.py", line 1379, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
System Info
python=3.11
langchain=0.3.18
Beta Was this translation helpful? Give feedback.
All reactions