From d102a45bbf6bb823cf7f1f37833f240d26e65d43 Mon Sep 17 00:00:00 2001 From: saurabh Date: Thu, 6 Mar 2025 15:51:30 +0530 Subject: [PATCH 1/4] New samples added to use the auto_reduce flag --- ...with_summary_history_reducer_autoreduce.py | 174 ++++++++++++++++++ ...h_truncation_history_reducer_autoreduce.py | 168 +++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py new file mode 100644 index 000000000000..6cee1d2f2b6a --- /dev/null +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py @@ -0,0 +1,174 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import ( + Services, + get_chat_completion_service_and_request_settings, +) +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistorySummarizationReducer +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +# This sample shows how to create a chatbot using a kernel function and leverage a chat history +# summarization reducer. +# This sample uses the following main components: +# - a ChatCompletionService: This component is responsible for generating responses to user messages. +# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history. +# A Chat History Reducer is a subclass of ChatHistory that provides additional +# functionality to reduce the history. +# - a KernelFunction: This function will be a prompt function, meaning the function is composed of +# a prompt and will be invoked by Semantic Kernel. +# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose. + +# [NOTE] +# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer. +# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly. + +# Toggle this flag to view the chat history summary after a reduction was performed. +view_chat_history_summary_after_reduction = True + +# You can select from the following chat completion services: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# This is the system message that gives the chatbot its personality. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. +""" + +# Create a kernel and register a prompt function. +# The prompt here contains two variables: chat_history and user_input. +# They will be replaced by the kernel with the actual values when the function is invoked. +# [NOTE] +# The chat_history, which is a ChatHistory object, will be serialized to a string internally +# to create/render the final prompt. +# Since this sample uses a chat completion service, the prompt will be deserialized back to +# a ChatHistory object that gets passed to the chat completion service. This new chat history +# object will contain the original messages and the user input. +kernel = Kernel() +chat_function = kernel.add_function( + plugin_name="ChatBot", + function_name="Chat", + prompt="{{$chat_history}}{{$user_input}}", + template_format="semantic-kernel", + # You can attach the request settings to the function or + # pass the settings to the kernel.invoke method via the kernel arguments. + # If you specify the settings in both places, the settings in the kernel arguments will + # take precedence given the same service id. + # prompt_execution_settings=request_settings, +) + +# Invoking a kernel function requires a service, so we add the chat completion service to the kernel. +kernel.add_service(chat_completion_service) + +# The chat history reducer is responsible for summarizing the chat history. +# It's a subclass of ChatHistory that provides additional functionality to reduce the history. +# You may use it just like a regular ChatHistory object. +summarization_reducer = ChatHistorySummarizationReducer( + service=kernel.get_service(), + # target_count: + # Purpose: Defines the target number of messages to retain after applying summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + target_count=3, + # threshold_count: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # target_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + threshold_count=2, + # auto_reduce: + # Purpose: Automatically summarizes the chat history after adding a new message using the method add_message_async. + # What it controls: When enabled, the reducer will automatically summarize the chat history after adding a new message using the method add_message_async. + auto_reduce=True, +) + +summarization_reducer.add_system_message(system_message) + +kernel.add_plugin(plugin=TimePlugin(), plugin_name="TimePlugin") + +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + + +async def chat() -> bool: + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input == "exit": + print("\n\nExiting chat...") + return False + + kernel_arguments = KernelArguments( + settings=request_settings, + chat_history=summarization_reducer, + user_input=user_input, + ) + answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments) + + if answer: + print(f"Mosscap:> {answer}") + summarization_reducer.add_user_message(user_input) + # If the summarization reducer is set to auto_reduce, the reducer will automatically summarize the chat history + # after adding a new message using the method add_message_async. + # If auto_reduce is disabled, you can manually summarize the chat history using the method reduce. + await summarization_reducer.add_message_async(answer.value[0]) + + print(f"Current number of messages: {len(summarization_reducer.messages)}") + for msg in summarization_reducer.messages: + if msg.metadata and msg.metadata.get("__summary__"): + print("*" * 60) + print("Summary detected:", msg.content) + print("*" * 60) + + + print("\n") + + return True + + +async def main() -> None: + # Start the chat loop. The chat loop will continue until the user types "exit". + chatting = True + while chatting: + chatting = await chat() + + # Sample output: + # User:> Why is the sky blue in one sentence? + # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere, + # a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more + # prominent in our visual perception. + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py new file mode 100644 index 000000000000..53451d6e27dd --- /dev/null +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py @@ -0,0 +1,168 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from samples.concepts.setup.chat_completion_services import ( + Services, + get_chat_completion_service_and_request_settings, +) +from semantic_kernel import Kernel +from semantic_kernel.contents import ChatHistoryTruncationReducer +from semantic_kernel.functions import KernelArguments + +# This sample shows how to create a chatbot using a kernel function and leverage a chat history +# truncation reducer. +# This sample uses the following two main components: +# - a ChatCompletionService: This component is responsible for generating responses to user messages. +# - a Chat History Reducer: This component is responsible for keeping track and reducing the chat history. +# A Chat History Reducer is a subclass of ChatHistory that provides additional +# functionality to reduce the history. +# - a KernelFunction: This function will be a prompt function, meaning the function is composed of +# a prompt and will be invoked by Semantic Kernel. +# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose. + +# [NOTE] +# The purpose of this sample is to demonstrate how to use a kernel function and use a chat history reducer. +# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly. + +# You can select from the following chat completion services: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# This is the system message that gives the chatbot its personality. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. +""" + +# Create a kernel and register a prompt function. +# The prompt here contains two variables: chat_history and user_input. +# They will be replaced by the kernel with the actual values when the function is invoked. +# [NOTE] +# The chat_history, which is a ChatHistory object, will be serialized to a string internally +# to create/render the final prompt. +# Since this sample uses a chat completion service, the prompt will be deserialized back to +# a ChatHistory object that gets passed to the chat completion service. This new chat history +# object will contain the original messages and the user input. +kernel = Kernel() +chat_function = kernel.add_function( + plugin_name="ChatBot", + function_name="Chat", + prompt="{{$chat_history}}{{$user_input}}", + template_format="semantic-kernel", + # You can attach the request settings to the function or + # pass the settings to the kernel.invoke method via the kernel arguments. + # If you specify the settings in both places, the settings in the kernel arguments will + # take precedence given the same service id. + # prompt_execution_settings=request_settings, +) + +# Invoking a kernel function requires a service, so we add the chat completion service to the kernel. +kernel.add_service(chat_completion_service) + +# The chat history reducer is responsible for truncating the chat history. +# It's a subclass of ChatHistory that provides additional functionality to reduce the history. +# You may use it just like a regular ChatHistory object. +truncation_reducer = ChatHistoryTruncationReducer( + service=kernel.get_service(), + # target_count: + # Purpose: Defines the target number of messages to retain after applying summarization. + # What it controls: This parameter determines how much of the most recent conversation history + # is preserved while discarding or summarizing older messages. + # Why change it?: + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history + # to maintain context. + # - Larger values: Use when retaining more conversational context is critical for accurate responses + # or maintaining a richer dialogue. + target_count=3, + # threshold_count: + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds + # target_count by a small margin. + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) + # are not "orphaned" or lost during truncation or summarization. + # Why change it?: + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older + # pairs of messages sooner. + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, + # especially for sensitive interactions like API function calls or complex responses. + threshold_count=2, + # auto_reduce: + # Purpose: Automatically truncates the chat history after adding a new message using the method add_message_async. + # What it controls: When enabled, the reducer will automatically truncate the chat history after adding a new message using the method add_message_async. + auto_reduce=True, +) + +truncation_reducer.add_system_message(system_message) + + +async def chat() -> bool: + try: + user_input = input("User:> ") + except KeyboardInterrupt: + print("\n\nExiting chat...") + return False + except EOFError: + print("\n\nExiting chat...") + return False + + if user_input == "exit": + print("\n\nExiting chat...") + return False + + # Attempt to reduce before adding the user message to the chat history. + await truncation_reducer.reduce() + + # Get the chat message content from the chat completion service. + kernel_arguments = KernelArguments( + settings=request_settings, + # Use keyword arguments to pass the chat history and user input to the kernel function. + chat_history=truncation_reducer, + user_input=user_input, + ) + + answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments) + # Alternatively, you can invoke the function directly with the kernel as an argument: + # answer = await chat_function.invoke(kernel, kernel_arguments) + if answer: + print(f"Mosscap:> {answer}") + # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here. + truncation_reducer.add_user_message(user_input) + # If the truncation reducer is set to auto_reduce, the reducer will automatically truncate the chat history + # after adding a new message using the method add_message_async. + # If auto_reduce is disabled, you can manually truncate the chat history using the method reduce. + await truncation_reducer.add_message_async(answer.value[0]) + + print(f"Current number of messages: {len(truncation_reducer.messages)}") + + return True + + +async def main() -> None: + # Start the chat loop. The chat loop will continue until the user types "exit". + chatting = True + while chatting: + chatting = await chat() + + # Sample output: + # User:> Why is the sky blue in one sentence? + # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere, + # a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more + # prominent in our visual perception. + + +if __name__ == "__main__": + asyncio.run(main()) From d263e81950b74f3db7bfce777561ff74da337879 Mon Sep 17 00:00:00 2001 From: saurabh Date: Fri, 7 Mar 2025 08:06:17 +0530 Subject: [PATCH 2/4] Updated the README.md to include links to simple_chatbot_with_summary_history_reducer_autoreduce.py and simple_chatbot_with_truncation_history_reducer_autoreduce.py --- python/samples/concepts/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md index 72fe6258f876..205fed6b0cdf 100644 --- a/python/samples/concepts/README.md +++ b/python/samples/concepts/README.md @@ -85,6 +85,8 @@ - [Simple Chatbot with Summary History Reducer Keeping Function Content](./chat_completion/simple_chatbot_with_summary_history_reducer_keep_func_content.py) - [Simple Chatbot with Summary History Reducer](./chat_completion/simple_chatbot_with_summary_history_reducer.py) - [Simple Chatbot with Truncation History Reducer](./chat_completion/simple_chatbot_with_truncation_history_reducer.py) +- [Simple Chatbot with Summary History Reducer using Auto Reduce](./chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py) +- [Simple Chatbot with Truncation History Reducer using Auto Reduce](./chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py) ### ChatHistory - Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py) From 961060c73161da60ab03ca827a97f2cba567b854 Mon Sep 17 00:00:00 2001 From: saurabh Date: Fri, 7 Mar 2025 09:03:57 +0530 Subject: [PATCH 3/4] fix end of file --- python/uv.lock | 72 ++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/python/uv.lock b/python/uv.lock index 54743f36699e..24851a408b66 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", @@ -587,7 +588,7 @@ name = "build" version = "1.2.2.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "(os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'linux') or (os_name == 'nt' and sys_platform == 'win32')" }, + { name = "colorama", marker = "os_name == 'nt' and sys_platform == 'win32'" }, { name = "importlib-metadata", marker = "(python_full_version < '3.10.2' and sys_platform == 'darwin') or (python_full_version < '3.10.2' and sys_platform == 'linux') or (python_full_version < '3.10.2' and sys_platform == 'win32')" }, { name = "packaging", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "pyproject-hooks", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -830,7 +831,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "(platform_system == 'Windows' and sys_platform == 'darwin') or (platform_system == 'Windows' and sys_platform == 'linux') or (platform_system == 'Windows' and sys_platform == 'win32')" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -2015,7 +2016,7 @@ name = "ipykernel" version = "6.29.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "(platform_system == 'Darwin' and sys_platform == 'darwin') or (platform_system == 'Darwin' and sys_platform == 'linux') or (platform_system == 'Darwin' and sys_platform == 'win32')" }, + { name = "appnope", marker = "sys_platform == 'darwin'" }, { name = "comm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "debugpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "ipython", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -2860,7 +2861,6 @@ name = "nvidia-cublas-cu12" version = "12.4.5.8" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, ] @@ -2869,7 +2869,6 @@ name = "nvidia-cuda-cupti-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, ] @@ -2878,7 +2877,6 @@ name = "nvidia-cuda-nvrtc-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, ] @@ -2887,7 +2885,6 @@ name = "nvidia-cuda-runtime-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, ] @@ -2896,7 +2893,7 @@ name = "nvidia-cudnn-cu12" version = "9.1.0.70" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, @@ -2907,10 +2904,9 @@ name = "nvidia-cufft-cu12" version = "11.2.1.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, ] @@ -2919,7 +2915,6 @@ name = "nvidia-curand-cu12" version = "10.3.5.147" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, ] @@ -2928,12 +2923,11 @@ name = "nvidia-cusolver-cu12" version = "11.6.1.9" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, - { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, ] @@ -2942,10 +2936,9 @@ name = "nvidia-cusparse-cu12" version = "12.3.1.170" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, ] @@ -2954,7 +2947,6 @@ name = "nvidia-cusparselt-cu12" version = "0.6.2" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/98/8e/675498726c605c9441cf46653bd29cb1b8666da1fb1469ffa25f67f20c58/nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:067a7f6d03ea0d4841c85f0c6f1991c5dda98211f6302cb83a4ab234ee95bef8", size = 149422781 }, { url = "https://files.pythonhosted.org/packages/78/a8/bcbb63b53a4b1234feeafb65544ee55495e1bb37ec31b999b963cbccfd1d/nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9", size = 150057751 }, ] @@ -2971,7 +2963,6 @@ name = "nvidia-nvjitlink-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, ] @@ -2980,7 +2971,6 @@ name = "nvidia-nvtx-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, ] @@ -3047,8 +3037,8 @@ name = "onnxruntime-genai" version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, - { name = "onnxruntime", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "numpy", marker = "(python_full_version < '3.13' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform == 'linux') or (python_full_version < '3.13' and sys_platform == 'win32')" }, + { name = "onnxruntime", marker = "(python_full_version < '3.13' and sys_platform == 'darwin') or (python_full_version < '3.13' and sys_platform == 'linux') or (python_full_version < '3.13' and sys_platform == 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/5f/7f/3e1edde3318458aabdd6070c44bedc2caa913949530d90ec89c32c76a036/onnxruntime_genai-0.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:b820e20e438fc2679db24e432c5652e20a972709e4002210a46b4f6282fd57d4", size = 871347 }, @@ -3578,7 +3568,7 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "(platform_system == 'Windows' and sys_platform == 'darwin') or (platform_system == 'Windows' and sys_platform == 'linux') or (platform_system == 'Windows' and sys_platform == 'win32')" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ @@ -5021,7 +5011,6 @@ wheels = [ [[package]] name = "semantic-kernel" -version = "1.22.1" source = { editable = "." } dependencies = [ { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -5078,7 +5067,7 @@ hugging-face = [ { name = "transformers", extra = ["torch"], marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] milvus = [ - { name = "milvus", marker = "(platform_system != 'Windows' and sys_platform == 'darwin') or (platform_system != 'Windows' and sys_platform == 'linux') or (platform_system != 'Windows' and sys_platform == 'win32')" }, + { name = "milvus", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pymilvus", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] mistralai = [ @@ -5167,7 +5156,7 @@ requires-dist = [ { name = "google-generativeai", marker = "extra == 'google'", specifier = "~=0.8" }, { name = "ipykernel", marker = "extra == 'notebooks'", specifier = "~=6.29" }, { name = "jinja2", specifier = "~=3.1" }, - { name = "milvus", marker = "platform_system != 'Windows' and extra == 'milvus'", specifier = ">=2.3,<2.3.8" }, + { name = "milvus", marker = "sys_platform != 'win32' and extra == 'milvus'", specifier = ">=2.3,<2.3.8" }, { name = "mistralai", marker = "extra == 'mistralai'", specifier = ">=1.2,<2.0" }, { name = "motor", marker = "extra == 'mongo'", specifier = ">=3.3.2,<3.8.0" }, { name = "nest-asyncio", specifier = "~=1.6" }, @@ -5201,6 +5190,7 @@ requires-dist = [ { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = ">=4.10,<5.0" }, { name = "websockets", marker = "extra == 'realtime'", specifier = ">=13,<15" }, ] +provides-extras = ["anthropic", "autogen", "aws", "azure", "chroma", "dapr", "google", "hugging-face", "milvus", "mistralai", "mongo", "notebooks", "ollama", "onnx", "pandas", "pinecone", "postgres", "qdrant", "realtime", "redis", "usearch", "weaviate"] [package.metadata.requires-dev] dev = [ @@ -5602,22 +5592,22 @@ dependencies = [ { name = "fsspec", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, - { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, - { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform == 'darwin') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')" }, { name = "sympy", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, - { name = "triton", marker = "(platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'darwin') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_system == 'Linux' and sys_platform == 'win32')" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] wheels = [ @@ -5662,7 +5652,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "(platform_system == 'Windows' and sys_platform == 'darwin') or (platform_system == 'Windows' and sys_platform == 'linux') or (platform_system == 'Windows' and sys_platform == 'win32')" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [ From 6649a4d6bda685bb0145e9e2cf15b1f61884d5fd Mon Sep 17 00:00:00 2001 From: saurabh Date: Fri, 7 Mar 2025 09:30:14 +0530 Subject: [PATCH 4/4] Fix for E501 Line too long --- ...simple_chatbot_with_summary_history_reducer_autoreduce.py | 4 ++-- ...ple_chatbot_with_truncation_history_reducer_autoreduce.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py index 6cee1d2f2b6a..5b7f3f681d4c 100644 --- a/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_summary_history_reducer_autoreduce.py @@ -107,7 +107,8 @@ threshold_count=2, # auto_reduce: # Purpose: Automatically summarizes the chat history after adding a new message using the method add_message_async. - # What it controls: When enabled, the reducer will automatically summarize the chat history after adding a new message using the method add_message_async. + # What it controls: When enabled, the reducer will automatically summarize the chat history + # after adding a new message using the method add_message_async. auto_reduce=True, ) @@ -150,7 +151,6 @@ async def chat() -> bool: print("*" * 60) print("Summary detected:", msg.content) print("*" * 60) - print("\n") diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py index 53451d6e27dd..e5c363e246b6 100644 --- a/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py +++ b/python/samples/concepts/chat_completion/simple_chatbot_with_truncation_history_reducer_autoreduce.py @@ -102,7 +102,8 @@ threshold_count=2, # auto_reduce: # Purpose: Automatically truncates the chat history after adding a new message using the method add_message_async. - # What it controls: When enabled, the reducer will automatically truncate the chat history after adding a new message using the method add_message_async. + # What it controls: When enabled, the reducer will automatically truncate the chat history + # after adding a new message using the method add_message_async. auto_reduce=True, ) @@ -145,7 +146,7 @@ async def chat() -> bool: # after adding a new message using the method add_message_async. # If auto_reduce is disabled, you can manually truncate the chat history using the method reduce. await truncation_reducer.add_message_async(answer.value[0]) - + print(f"Current number of messages: {len(truncation_reducer.messages)}") return True