From ce2b4b0872c7055d655056501dfa157cb8058f3a Mon Sep 17 00:00:00 2001 From: Abinash Date: Thu, 26 Jun 2025 20:56:29 +0530 Subject: [PATCH] Update export_task.py Update the code to explicitly use UTF-8 encoding when reading from or writing to a file. This is necessary because the exported data from LabelBox contains Chinese/Japanese characters, and without proper encoding, it results in a UnicodeEncodeError. --- libs/labelbox/src/labelbox/schema/export_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/export_task.py b/libs/labelbox/src/labelbox/schema/export_task.py index 2e206d433..0e36fe6ab 100644 --- a/libs/labelbox/src/labelbox/schema/export_task.py +++ b/libs/labelbox/src/labelbox/schema/export_task.py @@ -263,7 +263,7 @@ def read(self) -> Iterator[Tuple[_MetadataFileInfo, str]]: if not self._retrieval_strategy: raise ValueError("retrieval strategy not set") # create a buffer - with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: + with tempfile.NamedTemporaryFile(mode="w+", delete=False, encoding="utf-8") as temp_file: result = self._retrieval_strategy.get_next_chunk() while result: _, raw_data = result @@ -275,7 +275,7 @@ def read(self) -> Iterator[Tuple[_MetadataFileInfo, str]]: temp_file.write(raw_data) result = self._retrieval_strategy.get_next_chunk() # read buffer - with open(temp_file.name, "r") as temp_file_reopened: + with open(temp_file.name, "r", encoding="utf-8") as temp_file_reopened: for idx, line in enumerate(temp_file_reopened): yield ( _MetadataFileInfo(