Skip to content

Commit 5846fc5

Browse files
added encoder to whisper function in LLMWhisperClient
1 parent a7a58d4 commit 5846fc5

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

src/unstract/llmwhisperer/client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def whisper(
169169
ocr_provider: str = "advanced",
170170
line_splitter_tolerance: float = 0.4,
171171
horizontal_stretch_factor: float = 1.0,
172+
encoder = "ISO-8859-1"
172173
) -> dict:
173174
"""
174175
Sends a request to the LLMWhisperer API to process a document.
@@ -190,6 +191,7 @@ def whisper(
190191
ocr_provider (str, optional): The OCR provider. Can be "advanced" or "basic". Defaults to "advanced".
191192
line_splitter_tolerance (float, optional): The line splitter tolerance. Defaults to 0.4.
192193
horizontal_stretch_factor (float, optional): The horizontal stretch factor. Defaults to 1.0.
194+
encoder (str): The character encoding to use for processing the text. Defaults to "ISO-8859-1".
193195
194196
Returns:
195197
dict: The response from the API as a dictionary.
@@ -268,6 +270,7 @@ def generate():
268270
prepared = req.prepare()
269271
s = requests.Session()
270272
response = s.send(prepared, timeout=self.api_timeout, stream=should_stream)
273+
response.encoding = encoder
271274
if response.status_code != 200 and response.status_code != 202:
272275
message = json.loads(response.text)
273276
message["status_code"] = response.status_code

0 commit comments

Comments
 (0)