camel-ai
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/bug_report.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎camel/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎camel/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎camel/memories/records.py‎
Lines changed: 36 additions & 0 deletions b/‎camel/memories/records.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎camel/messages/base.py‎
Lines changed: 126 additions & 33 deletions b/‎camel/messages/base.py‎
Lines changed: 126 additions & 33 deletions
diff --git a/‎camel/tasks/task.py‎
Lines changed: 4 additions & 3 deletions b/‎camel/tasks/task.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎camel/toolkits/video_analysis_toolkit.py‎
Lines changed: 1 addition & 1 deletion b/‎camel/toolkits/video_analysis_toolkit.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎camel/utils/token_counting.py‎
Lines changed: 25 additions & 17 deletions b/‎camel/utils/token_counting.py‎
Lines changed: 25 additions & 17 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
@@ -26,7 +26,7 @@ body:
     attributes:
       label: What version of camel are you using?
       description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here.
-      placeholder: E.g., 0.2.76a10
+      placeholder: E.g., 0.2.76a11
     validations:
       required: true
 
 
@@ -14,7 +14,7 @@
 
 from camel.logger import disable_logging, enable_logging, set_log_level
 
-__version__ = '0.2.76a10'
+__version__ = '0.2.76a11'
 
 __all__ = [
     '__version__',
 
@@ -94,6 +94,42 @@ def from_dict(cls, record_dict: Dict[str, Any]) -> "MemoryRecord":
         if "role_type" in data and isinstance(data["role_type"], str):
             data["role_type"] = RoleType(data["role_type"])
 
+        # Deserialize image_list from base64 strings/URLs back to PIL Images/
+        # URLs
+        if "image_list" in data and data["image_list"] is not None:
+            import base64
+            from io import BytesIO
+
+            from PIL import Image
+
+            image_objects = []
+            for img_item in data["image_list"]:
+                if isinstance(img_item, dict):
+                    # New format with type indicator
+                    if img_item["type"] == "url":
+                        # URL string, keep as-is
+                        image_objects.append(img_item["data"])
+                    else:  # type == "base64"
+                        # Base64 encoded image, convert to PIL Image
+                        img_bytes = base64.b64decode(img_item["data"])
+                        img = Image.open(BytesIO(img_bytes))
+                        # Restore the format attribute if it was saved
+                        if "format" in img_item:
+                            img.format = img_item["format"]
+                        image_objects.append(img)
+                else:
+                    # Legacy format: assume it's a base64 string
+                    img_bytes = base64.b64decode(img_item)
+                    img = Image.open(BytesIO(img_bytes))
+                    image_objects.append(img)
+            data["image_list"] = image_objects
+
+        # Deserialize video_bytes from base64 string
+        if "video_bytes" in data and data["video_bytes"] is not None:
+            import base64
+
+            data["video_bytes"] = base64.b64decode(data["video_bytes"])
+
         # Get valid constructor parameters (cached)
         valid_params = cls._get_constructor_params(message_cls)
 
 
@@ -64,8 +64,9 @@ class BaseMessage:
         content (str): The content of the message.
         video_bytes (Optional[bytes]): Optional bytes of a video associated
             with the message. (default: :obj:`None`)
-        image_list (Optional[List[Image.Image]]): Optional list of PIL Image
-            objects associated with the message. (default: :obj:`None`)
+        image_list (Optional[List[Union[Image.Image, str]]]): Optional list of
+            PIL Image objects or image URLs (strings) associated with the
+            message. (default: :obj:`None`)
         image_detail (Literal["auto", "low", "high"]): Detail level of the
             images associated with the message. (default: :obj:`auto`)
         video_detail (Literal["auto", "low", "high"]): Detail level of the
@@ -80,7 +81,7 @@ class BaseMessage:
     content: str
 
     video_bytes: Optional[bytes] = None
-    image_list: Optional[List[Image.Image]] = None
+    image_list: Optional[List[Union[Image.Image, str]]] = None
     image_detail: Literal["auto", "low", "high"] = "auto"
     video_detail: Literal["auto", "low", "high"] = "auto"
     parsed: Optional[Union[BaseModel, dict]] = None
@@ -92,7 +93,7 @@ def make_user_message(
         content: str,
         meta_dict: Optional[Dict[str, str]] = None,
         video_bytes: Optional[bytes] = None,
-        image_list: Optional[List[Image.Image]] = None,
+        image_list: Optional[List[Union[Image.Image, str]]] = None,
         image_detail: Union[
             OpenAIVisionDetailType, str
         ] = OpenAIVisionDetailType.AUTO,
@@ -109,8 +110,9 @@ def make_user_message(
                 dictionary for the message.
             video_bytes (Optional[bytes]): Optional bytes of a video
                 associated with the message.
-            image_list (Optional[List[Image.Image]]): Optional list of PIL
-                Image objects associated with the message.
+            image_list (Optional[List[Union[Image.Image, str]]]): Optional list
+                of PIL Image objects or image URLs (strings) associated with
+                the message.
             image_detail (Union[OpenAIVisionDetailType, str]): Detail level of
                 the images associated with the message.
             video_detail (Union[OpenAIVisionDetailType, str]): Detail level of
@@ -137,7 +139,7 @@ def make_assistant_message(
         content: str,
         meta_dict: Optional[Dict[str, str]] = None,
         video_bytes: Optional[bytes] = None,
-        image_list: Optional[List[Image.Image]] = None,
+        image_list: Optional[List[Union[Image.Image, str]]] = None,
         image_detail: Union[
             OpenAIVisionDetailType, str
         ] = OpenAIVisionDetailType.AUTO,
@@ -154,8 +156,9 @@ def make_assistant_message(
                 dictionary for the message.
             video_bytes (Optional[bytes]): Optional bytes of a video
                 associated with the message.
-            image_list (Optional[List[Image.Image]]): Optional list of PIL
-                Image objects associated with the message.
+            image_list (Optional[List[Union[Image.Image, str]]]): Optional list
+                of PIL Image objects or image URLs (strings) associated with
+                the message.
             image_detail (Union[OpenAIVisionDetailType, str]): Detail level of
                 the images associated with the message.
             video_detail (Union[OpenAIVisionDetailType, str]): Detail level of
@@ -436,31 +439,64 @@ def to_openai_user_message(self) -> OpenAIUserMessage:
         )
         if self.image_list and len(self.image_list) > 0:
             for image in self.image_list:
-                if image.format is None:
-                    # Set default format to PNG as fallback
-                    image.format = 'PNG'
-
-                image_type: str = image.format.lower()
-                if image_type not in OpenAIImageType:
-                    raise ValueError(
-                        f"Image type {image.format} "
-                        f"is not supported by OpenAI vision model"
+                # Check if image is a URL string or PIL Image
+                if isinstance(image, str):
+                    # Image is a URL string
+                    hybrid_content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image,
+                                "detail": self.image_detail,
+                            },
+                        }
                     )
-                with io.BytesIO() as buffer:
-                    image.save(fp=buffer, format=image.format)
-                    encoded_image = base64.b64encode(buffer.getvalue()).decode(
-                        "utf-8"
+                else:
+                    # Image is a PIL Image object
+                    if image.format is None:
+                        # Set default format to PNG as fallback
+                        image.format = 'PNG'
+
+                    image_type: str = image.format.lower()
+                    if image_type not in OpenAIImageType:
+                        raise ValueError(
+                            f"Image type {image.format} "
+                            f"is not supported by OpenAI vision model"
+                        )
+
+                    # Convert RGBA to RGB for formats that don't support
+                    # transparency or when the image has transparency channel
+                    img_to_save = image
+                    if image.mode in ('RGBA', 'LA', 'P') and image_type in (
+                        'jpeg',
+                        'jpg',
+                    ):
+                        # JPEG doesn't support transparency, convert to RGB
+                        img_to_save = image.convert('RGB')
+                    elif (
+                        image.mode in ('RGBA', 'LA', 'P')
+                        and image_type == 'png'
+                    ):
+                        # For PNG with transparency, convert to RGBA if needed
+                        if image.mode in ('LA', 'P'):
+                            img_to_save = image.convert('RGBA')
+                        # else: RGBA mode, keep as-is
+
+                    with io.BytesIO() as buffer:
+                        img_to_save.save(fp=buffer, format=image.format)
+                        encoded_image = base64.b64encode(
+                            buffer.getvalue()
+                        ).decode("utf-8")
+                    image_prefix = f"data:image/{image_type};base64,"
+                    hybrid_content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"{image_prefix}{encoded_image}",
+                                "detail": self.image_detail,
+                            },
+                        }
                     )
-                image_prefix = f"data:image/{image_type};base64,"
-                hybrid_content.append(
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"{image_prefix}{encoded_image}",
-                            "detail": self.image_detail,
-                        },
-                    }
-                )
 
         if self.video_bytes:
             import imageio.v3 as iio
@@ -552,9 +588,66 @@ def to_dict(self) -> Dict:
         Returns:
             dict: The converted dictionary.
         """
-        return {
+        result = {
             "role_name": self.role_name,
             "role_type": self.role_type.value,
             **(self.meta_dict or {}),
             "content": self.content,
         }
+
+        # Include image/video fields if present
+        if self.image_list is not None:
+            # Handle both PIL Images and URL strings
+            import base64
+            from io import BytesIO
+
+            image_data_list = []
+            for img in self.image_list:
+                if isinstance(img, str):
+                    # Image is a URL string, store as-is
+                    image_data_list.append({"type": "url", "data": img})
+                else:
+                    # Image is a PIL Image, convert to base64
+                    # Preserve format, default to PNG if not set
+                    img_format = img.format if img.format else "PNG"
+
+                    # Handle transparency for different formats
+                    img_to_save = img
+                    if img.mode in (
+                        'RGBA',
+                        'LA',
+                        'P',
+                    ) and img_format.upper() in ('JPEG', 'JPG'):
+                        # JPEG doesn't support transparency, convert to RGB
+                        img_to_save = img.convert('RGB')
+                    elif (
+                        img.mode in ('LA', 'P') and img_format.upper() == 'PNG'
+                    ):
+                        # For PNG with transparency, convert to RGBA if needed
+                        img_to_save = img.convert('RGBA')
+                    # else: keep as-is for other combinations
+
+                    buffered = BytesIO()
+                    img_to_save.save(buffered, format=img_format)
+                    img_str = base64.b64encode(buffered.getvalue()).decode()
+                    image_data_list.append(
+                        {
+                            "type": "base64",
+                            "data": img_str,
+                            "format": img_format,  # Preserve format
+                        }
+                    )
+            result["image_list"] = image_data_list
+
+        if self.video_bytes is not None:
+            import base64
+
+            result["video_bytes"] = base64.b64encode(self.video_bytes).decode()
+
+        if self.image_detail is not None:
+            result["image_detail"] = self.image_detail
+
+        if self.video_detail is not None:
+            result["video_detail"] = self.video_detail
+
+        return result
@@ -237,8 +237,9 @@ class Task(BaseModel):
             (default: :obj:`[]`)
         additional_info (Optional[Dict[str, Any]]): Additional information for
             the task. (default: :obj:`None`)
-        image_list (Optional[List[Image.Image]]): Optional list of PIL Image
-            objects associated with the task. (default: :obj:`None`)
+        image_list (Optional[List[Union[Image.Image, str]]]): Optional list
+            of PIL Image objects or image URLs (strings) associated with the
+            task. (default: :obj:`None`)
         image_detail (Literal["auto", "low", "high"]): Detail level of the
             images associated with the task. (default: :obj:`auto`)
         video_bytes (Optional[bytes]): Optional bytes of a video associated
@@ -271,7 +272,7 @@ class Task(BaseModel):
 
     additional_info: Optional[Dict[str, Any]] = None
 
-    image_list: Optional[List[Image.Image]] = None
+    image_list: Optional[List[Union[Image.Image, str]]] = None
 
     image_detail: Literal["auto", "low", "high"] = "auto"
 
 
@@ -600,7 +600,7 @@ def ask_question_about_video(
             msg = BaseMessage.make_user_message(
                 role_name="User",
                 content=prompt,
-                image_list=video_frames,
+                image_list=video_frames,  # type: ignore[arg-type]
             )
             # Reset the agent to clear previous state
             self.vl_agent.reset()
 
@@ -195,24 +195,32 @@ def count_tokens_from_messages(self, messages: List[OpenAIMessage]) -> int:
                             image_str: str = item["image_url"]["url"]
                             detail = item["image_url"]["detail"]
 
-                            image_prefix_format = "data:image/{};base64,"
-                            image_prefix: Optional[str] = None
-                            for image_type in list(OpenAIImageType):
-                                # Find the correct image format
-                                image_prefix = image_prefix_format.format(
-                                    image_type.value
+                            # Only count tokens for base64 encoded images
+                            # For URLs, we cannot reliably determine token count without fetching the image
+                            if image_str.startswith("data:image"):
+                                # Base64 encoded image
+                                image_prefix_format = "data:image/{};base64,"
+                                image_prefix: Optional[str] = None
+                                for image_type in list(OpenAIImageType):
+                                    # Find the correct image format
+                                    image_prefix = image_prefix_format.format(
+                                        image_type.value
+                                    )
+                                    if image_prefix in image_str:
+                                        break
+                                assert isinstance(image_prefix, str)
+                                encoded_image = image_str.split(image_prefix)[
+                                    1
+                                ]
+                                image_bytes = BytesIO(
+                                    base64.b64decode(encoded_image)
                                 )
-                                if image_prefix in image_str:
-                                    break
-                            assert isinstance(image_prefix, str)
-                            encoded_image = image_str.split(image_prefix)[1]
-                            image_bytes = BytesIO(
-                                base64.b64decode(encoded_image)
-                            )
-                            image = Image.open(image_bytes)
-                            num_tokens += self._count_tokens_from_image(
-                                image, OpenAIVisionDetailType(detail)
-                            )
+                                image = Image.open(image_bytes)
+                                num_tokens += self._count_tokens_from_image(
+                                    image, OpenAIVisionDetailType(detail)
+                                )
+                            # Note: For regular URLs, token count cannot be determined without fetching the image
+                            # The actual token usage will be reported by the API response
                 if key == "name":
                     num_tokens += self.tokens_per_name
 
 
@@ -27,7 +27,7 @@
 project = 'CAMEL'
 copyright = '2024, CAMEL-AI.org'
 author = 'CAMEL-AI.org'
-release = '0.2.76a10'
+release = '0.2.76a11'
 
 html_favicon = (
     'https://raw.githubusercontent.com/camel-ai/camel/master/misc/favicon.png'
Original file line number	Diff line number	Diff line change
`@@ -600,7 +600,7 @@ def ask_question_about_video(`
`600`	`600`	`msg = BaseMessage.make_user_message(`
`601`	`601`	`role_name="User",`
`602`	`602`	`content=prompt,`
`603`		`- image_list=video_frames,`
	`603`	`+ image_list=video_frames, # type: ignore[arg-type]`
`604`	`604`	`)`
`605`	`605`	`# Reset the agent to clear previous state`
`606`	`606`	`self.vl_agent.reset()`