From 63868db2b9219309643f229d2e0fc1b36e85b2ff Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Thu, 16 May 2024 09:08:12 +0000 Subject: [PATCH 1/6] Minor fixes from previous PR --- .../batch/utilities/helpers/embedders/push_embedder.py | 1 - code/tests/utilities/helpers/test_push_embedder.py | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 7ab2ac29d..fff5b2152 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -62,7 +62,6 @@ def __embed( image_vectors = self.azure_computer_vision_client.vectorize_image( source_url ) - logger.info("Image vectors: " + str(image_vectors)) documents_to_upload.append( self.__create_image_document(source_url, image_vectors) diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index 48f5a7b0a..add8b23cd 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -338,10 +338,8 @@ def test_embed_file_raises_exception_on_failure( # given push_embedder = PushEmbedder(MagicMock(), MagicMock()) - successful_indexing_result = MagicMock() - successful_indexing_result.succeeded = True - failed_indexing_result = MagicMock() - failed_indexing_result.succeeded = False + successful_indexing_result = MagicMock(succeeded=True) + failed_indexing_result = MagicMock(succeeded=False) azure_search_helper_mock.return_value.get_search_client.return_value.upload_documents.return_value = [ successful_indexing_result, failed_indexing_result, From 8310ce9509864f79960c36976422ad6422b0094c Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Thu, 16 May 2024 09:32:49 +0000 Subject: [PATCH 2/6] Get image caption from LLM --- .../helpers/embedders/push_embedder.py | 23 +++++++++++++++++++ .../batch/utilities/helpers/env_helper.py | 1 + .../batch/utilities/helpers/llm_helper.py | 5 ++-- infra/main.bicep | 2 +- infra/main.bicepparam | 2 +- infra/main.json | 4 ++-- 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index fff5b2152..25e8a4495 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -24,6 +24,7 @@ class PushEmbedder(EmbedderBase): def __init__(self, blob_client: AzureBlobStorageClient, env_helper: EnvHelper): + self.env_helper = env_helper self.llm_helper = LLMHelper() self.azure_search_helper = AzureSearchHelper() self.azure_computer_vision_client = AzureComputerVisionClient(env_helper) @@ -63,6 +64,28 @@ def __embed( source_url ) + model = self.env_helper.AZURE_OPENAI_VISION_MODEL + caption_system_message = """You are a assistant that generates rich descriptions of images. +You need to be accurate in the information you extract and detailed in the descriptons you generate. +Do not abbreviate anything and do not shorten sentances. Explain the image completely. +If you are provided with an image of a flow chart, describe the flow chart in detail. +If the image is mostly text, use OCR to extract the text as it is displayed in the image.""" + + messages = [ + {"role": "system", "content": caption_system_message}, + { + "role": "user", + "content": [ + {"text": "Describe this image in detail", "type": "text"}, + {"image_url": source_url, "type": "image_url"}, + ], + }, + ] + + response = self.llm_helper.get_chat_completion(messages, model) + + # Remember to limit size of caption + documents_to_upload.append( self.__create_image_document(source_url, image_vectors) ) diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index 138ecd890..b860a79c2 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -86,6 +86,7 @@ def __load_config(self, **kwargs) -> None: self.AZURE_OPENAI_MODEL_NAME = os.getenv( "AZURE_OPENAI_MODEL_NAME", "gpt-35-turbo" ) + self.AZURE_OPENAI_VISION_MODEL = os.getenv("AZURE_OPENAI_VISION_MODEL", "gpt-4") self.AZURE_OPENAI_TEMPERATURE = os.getenv("AZURE_OPENAI_TEMPERATURE", "0") self.AZURE_OPENAI_TOP_P = os.getenv("AZURE_OPENAI_TOP_P", "1.0") self.AZURE_OPENAI_MAX_TOKENS = os.getenv("AZURE_OPENAI_MAX_TOKENS", "1000") diff --git a/code/backend/batch/utilities/helpers/llm_helper.py b/code/backend/batch/utilities/helpers/llm_helper.py index 8c6084033..685cf85b7 100644 --- a/code/backend/batch/utilities/helpers/llm_helper.py +++ b/code/backend/batch/utilities/helpers/llm_helper.py @@ -29,6 +29,7 @@ def __init__(self): ) self.llm_model = self.env_helper.AZURE_OPENAI_MODEL + self.llm_vision_model = self.env_helper.AZURE_OPENAI_VISION_MODEL self.llm_max_tokens = ( self.env_helper.AZURE_OPENAI_MAX_TOKENS if self.env_helper.AZURE_OPENAI_MAX_TOKENS != "" @@ -117,9 +118,9 @@ def get_chat_completion_with_functions( function_call=function_call, ) - def get_chat_completion(self, messages: list[dict]): + def get_chat_completion(self, messages: list[dict], model: str | None = None): return self.openai_client.chat.completions.create( - model=self.llm_model, + model=model or self.llm_model, messages=messages, ) diff --git a/infra/main.bicep b/infra/main.bicep index a0c3b6597..461fb0816 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -108,7 +108,7 @@ param azureOpenAIModelCapacity int = 30 param useAdvancedImageProcessing bool = false @description('Azure OpenAI Vision Model Deployment Name') -param azureOpenAIVisionModel string = 'gpt-4-vision' +param azureOpenAIVisionModel string = 'gpt-4' @description('Azure OpenAI Vision Model Name') param azureOpenAIVisionModelName string = 'gpt-4' diff --git a/infra/main.bicepparam b/infra/main.bicepparam index 2aaec96f4..e19c2656e 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -25,7 +25,7 @@ param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME', param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '0613') param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30')) param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false')) -param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4-vision') +param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4') param azureOpenAIVisionModelName = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_NAME', 'gpt-4') param azureOpenAIVisionModelVersion = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_VERSION', 'vision-preview') param azureOpenAIVisionModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_CAPACITY', '10')) diff --git a/infra/main.json b/infra/main.json index fec8cca5a..9404b20cc 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "13373198886203455254" + "templateHash": "9021391279672164541" } }, "parameters": { @@ -224,7 +224,7 @@ }, "azureOpenAIVisionModel": { "type": "string", - "defaultValue": "gpt-4-vision", + "defaultValue": "gpt-4", "metadata": { "description": "Azure OpenAI Vision Model Deployment Name" } From 16c720ccc0c562fd24227401cf2071c5bfdf7121 Mon Sep 17 00:00:00 2001 From: Bhavana Hindupur Date: Thu, 16 May 2024 14:42:47 +0000 Subject: [PATCH 3/6] Generate and embed image caption --- .../helpers/embedders/push_embedder.py | 71 ++++++++------ code/tests/functional/app_config.py | 1 + code/tests/functional/conftest.py | 27 ++++++ .../test_advanced_image_processing.py | 92 ++++++++++++++++++- .../utilities/helpers/test_push_embedder.py | 60 ++++++++++-- 5 files changed, 213 insertions(+), 38 deletions(-) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 25e8a4495..a50e305c4 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -60,34 +60,15 @@ def __embed( in self.config.get_advanced_image_processing_image_types() ): logger.warning("Advanced image processing is not supported yet") - image_vectors = self.azure_computer_vision_client.vectorize_image( - source_url - ) - - model = self.env_helper.AZURE_OPENAI_VISION_MODEL - caption_system_message = """You are a assistant that generates rich descriptions of images. -You need to be accurate in the information you extract and detailed in the descriptons you generate. -Do not abbreviate anything and do not shorten sentances. Explain the image completely. -If you are provided with an image of a flow chart, describe the flow chart in detail. -If the image is mostly text, use OCR to extract the text as it is displayed in the image.""" - - messages = [ - {"role": "system", "content": caption_system_message}, - { - "role": "user", - "content": [ - {"text": "Describe this image in detail", "type": "text"}, - {"image_url": source_url, "type": "image_url"}, - ], - }, - ] - response = self.llm_helper.get_chat_completion(messages, model) - - # Remember to limit size of caption + caption = self.generate_image_caption(source_url) + caption_vector = self.llm_helper.generate_embeddings(caption) + image_vector = self.azure_computer_vision_client.vectorize_image(source_url) documents_to_upload.append( - self.__create_image_document(source_url, image_vectors) + self.__create_image_document( + source_url, image_vector, caption, caption_vector + ) ) else: documents: List[SourceDocument] = self.document_loading.load( @@ -107,6 +88,32 @@ def __embed( logger.error("Failed to upload documents to search index") raise Exception(response) + def generate_image_caption(self, source_url): + model = self.env_helper.AZURE_OPENAI_VISION_MODEL + caption_system_message = """You are a assistant that generates rich descriptions of images. +You need to be accurate in the information you extract and detailed in the descriptons you generate. +Do not abbreviate anything and do not shorten sentances. Explain the image completely. +If you are provided with an image of a flow chart, describe the flow chart in detail. +If the image is mostly text, use OCR to extract the text as it is displayed in the image.""" + + messages = [ + {"role": "system", "content": caption_system_message}, + { + "role": "user", + "content": [ + { + "text": "Describe this image in detail. Limit the response to 500 words.", + "type": "text", + }, + {"image_url": source_url, "type": "image_url"}, + ], + }, + ] + + response = self.llm_helper.get_chat_completion(messages, model) + caption = response.choices[0].message.content + return caption + def __convert_to_search_document(self, document: SourceDocument): embedded_content = self.llm_helper.generate_embeddings(document.content) metadata = { @@ -133,7 +140,13 @@ def __generate_document_id(self, source_url: str) -> str: hash_key = hashlib.sha1(f"{source_url}_1".encode("utf-8")).hexdigest() return f"doc_{hash_key}" - def __create_image_document(self, source_url: str, image_vectors: List[float]): + def __create_image_document( + self, + source_url: str, + image_vector: List[float], + content: str = "", + content_vector: List[float] = [], + ): parsed_url = urlparse(source_url) file_url = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path @@ -149,9 +162,9 @@ def __create_image_document(self, source_url: str, image_vectors: List[float]): return { "id": document_id, - "content": "", - "content_vector": [], - "image_vector": image_vectors, + "content": content, + "content_vector": content_vector, + "image_vector": image_vector, "metadata": json.dumps( { "id": document_id, diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py index b1c841c14..18837a4da 100644 --- a/code/tests/functional/app_config.py +++ b/code/tests/functional/app_config.py @@ -28,6 +28,7 @@ class AppConfig: "AZURE_OPENAI_MAX_TOKENS": "1000", "AZURE_OPENAI_MODEL": "some-openai-model", "AZURE_OPENAI_MODEL_NAME": "some-openai-model-name", + "AZURE_OPENAI_VISION_MODEL": "some-openai-vision-model", "AZURE_OPENAI_RESOURCE": "some-openai-resource", "AZURE_OPENAI_STREAM": "True", "AZURE_OPENAI_STOP_SEQUENCE": "", diff --git a/code/tests/functional/conftest.py b/code/tests/functional/conftest.py index 6e5e6408f..8f76a14e4 100644 --- a/code/tests/functional/conftest.py +++ b/code/tests/functional/conftest.py @@ -162,6 +162,33 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): } ) + httpserver.expect_request( + f"/openai/deployments/{app_config.get('AZURE_OPENAI_VISION_MODEL')}/chat/completions", + method="POST", + ).respond_with_json( + { + "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9", + "object": "chat.completion", + "created": 1679072642, + "model": app_config.get("AZURE_OPENAI_VISION_MODEL"), + "usage": { + "prompt_tokens": 58, + "completion_tokens": 68, + "total_tokens": 126, + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": "This is a caption for the image", + }, + "finish_reason": "stop", + "index": 0, + } + ], + } + ) + httpserver.expect_request( f"/indexes('{app_config.get('AZURE_SEARCH_CONVERSATIONS_LOG_INDEX')}')/docs/search.index", method="POST", diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 300ec4a7e..875d5422f 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -103,6 +103,9 @@ def test_image_passed_to_computer_vision_to_generate_image_embeddings( RequestMatcher( path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + json={ + "url": ANY, + }, query_string="api-version=2024-02-01&model-version=2023-04-15", headers={ "Content-Type": "application/json", @@ -115,7 +118,87 @@ def test_image_passed_to_computer_vision_to_generate_image_embeddings( )[0] assert request.get_json()["url"].startswith( - f"{app_config.get('AZURE_COMPUTER_VISION_ENDPOINT')}{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + ) + + +def test_image_passed_to_llm_to_generate_caption( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + request = verify_request_made( + mock_httpserver=httpserver, + request_matcher=RequestMatcher( + path=f"/openai/deployments/{app_config.get('AZURE_OPENAI_VISION_MODEL')}/chat/completions", + method="POST", + json={ + "messages": [ + { + "role": "system", + "content": """You are a assistant that generates rich descriptions of images. +You need to be accurate in the information you extract and detailed in the descriptons you generate. +Do not abbreviate anything and do not shorten sentances. Explain the image completely. +If you are provided with an image of a flow chart, describe the flow chart in detail. +If the image is mostly text, use OCR to extract the text as it is displayed in the image.""", + }, + { + "role": "user", + "content": [ + { + "text": "Describe this image in detail. Limit the response to 500 words.", + "type": "text", + }, + {"image_url": ANY, "type": "image_url"}, + ], + }, + ], + "model": app_config.get("AZURE_OPENAI_VISION_MODEL"), + }, + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + "Authorization": f"Bearer {app_config.get('AZURE_OPENAI_API_KEY')}", + "Api-Key": app_config.get("AZURE_OPENAI_API_KEY"), + }, + query_string="api-version=2024-02-01", + times=1, + ), + )[0] + + assert request.get_json()["messages"][1]["content"][1]["image_url"].startswith( + f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + ) + + +def test_embeddings_generated_for_caption( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + verify_request_made( + mock_httpserver=httpserver, + request_matcher=RequestMatcher( + path=f"/openai/deployments/{app_config.get('AZURE_OPENAI_EMBEDDING_MODEL')}/embeddings", + method="POST", + json={ + "input": ["This is a caption for the image"], + "model": app_config.get("AZURE_OPENAI_EMBEDDING_MODEL"), + "encoding_format": "base64", + }, + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + "Authorization": f"Bearer {app_config.get('AZURE_OPENAI_API_KEY')}", + "Api-Key": app_config.get("AZURE_OPENAI_API_KEY"), + }, + query_string="api-version=2024-02-01", + times=1, + ), ) @@ -343,8 +426,11 @@ def test_makes_correct_call_to_store_documents_in_search_index( "value": [ { "id": expected_id, - "content": "", - "content_vector": [], + "content": "This is a caption for the image", + "content_vector": [ + 0.018990106880664825, + -0.0073809814639389515, + ], "image_vector": [1.0, 2.0, 3.0], "metadata": json.dumps( { diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index add8b23cd..c2206e84f 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -22,8 +22,13 @@ def llm_helper_mock(): llm_helper.get_embedding_model.return_value.embed_query.return_value = [ 0 ] * 1536 + mock_completion = llm_helper.get_chat_completion.return_value + choice = MagicMock() + choice.message.content = "This is a caption for an image" + mock_completion.choices = [choice] + llm_helper.generate_embeddings.return_value = [123] - yield mock + yield llm_helper @pytest.fixture(autouse=True) @@ -129,7 +134,46 @@ def test_embed_file_advanced_image_processing_vectorizes_image( ) +def test_embed_file_advanced_image_processing_uses_vision_model_for_captioning( + llm_helper_mock, +): + # given + env_helper_mock = MagicMock() + env_helper_mock.AZURE_OPENAI_VISION_MODEL = "gpt-4" + push_embedder = PushEmbedder(MagicMock(), env_helper_mock) + source_url = "http://localhost:8080/some-file-name.jpg" + + # when + push_embedder.embed_file(source_url, "some-file-name.jpg") + + # then + llm_helper_mock.get_chat_completion.assert_called_once_with( + [ + { + "role": "system", + "content": """You are a assistant that generates rich descriptions of images. +You need to be accurate in the information you extract and detailed in the descriptons you generate. +Do not abbreviate anything and do not shorten sentances. Explain the image completely. +If you are provided with an image of a flow chart, describe the flow chart in detail. +If the image is mostly text, use OCR to extract the text as it is displayed in the image.""", + }, + { + "role": "user", + "content": [ + { + "text": "Describe this image in detail. Limit the response to 500 words.", + "type": "text", + }, + {"image_url": source_url, "type": "image_url"}, + ], + }, + ], + env_helper_mock.AZURE_OPENAI_VISION_MODEL, + ) + + def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index( + llm_helper_mock, azure_computer_vision_mock, azure_search_helper_mock: MagicMock, ): @@ -153,12 +197,16 @@ def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index( hash_key = hashlib.sha1(f"{host_path}_1".encode("utf-8")).hexdigest() expected_id = f"doc_{hash_key}" + llm_helper_mock.generate_embeddings.assert_called_once_with( + "This is a caption for an image" + ) + azure_search_helper_mock.return_value.get_search_client.return_value.upload_documents.assert_called_once_with( [ { "id": expected_id, - "content": "", - "content_vector": [], + "content": "This is a caption for an image", + "content_vector": [123], "image_vector": image_embeddings, "metadata": json.dumps( { @@ -265,7 +313,7 @@ def test_embed_file_generates_embeddings_for_documents(llm_helper_mock): ) # then - llm_helper_mock.return_value.generate_embeddings.assert_has_calls( + llm_helper_mock.generate_embeddings.assert_has_calls( [call("some content"), call("some other content")] ) @@ -291,7 +339,7 @@ def test_embed_file_stores_documents_in_search_index( { "id": expected_chunked_documents[0].id, "content": expected_chunked_documents[0].content, - "content_vector": llm_helper_mock.return_value.generate_embeddings.return_value, + "content_vector": llm_helper_mock.generate_embeddings.return_value, "metadata": json.dumps( { "id": expected_chunked_documents[0].id, @@ -311,7 +359,7 @@ def test_embed_file_stores_documents_in_search_index( { "id": expected_chunked_documents[1].id, "content": expected_chunked_documents[1].content, - "content_vector": llm_helper_mock.return_value.generate_embeddings.return_value, + "content_vector": llm_helper_mock.generate_embeddings.return_value, "metadata": json.dumps( { "id": expected_chunked_documents[1].id, From 27b35d289bf368a0240c6d695d59495597c4a951 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Thu, 16 May 2024 15:07:45 +0000 Subject: [PATCH 4/6] Grammar --- code/backend/batch/utilities/helpers/embedders/push_embedder.py | 2 +- .../tests/functions/test_advanced_image_processing.py | 2 +- code/tests/utilities/helpers/test_push_embedder.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index a50e305c4..1d3473a8b 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -90,7 +90,7 @@ def __embed( def generate_image_caption(self, source_url): model = self.env_helper.AZURE_OPENAI_VISION_MODEL - caption_system_message = """You are a assistant that generates rich descriptions of images. + caption_system_message = """You are an assistant that generates rich descriptions of images. You need to be accurate in the information you extract and detailed in the descriptons you generate. Do not abbreviate anything and do not shorten sentances. Explain the image completely. If you are provided with an image of a flow chart, describe the flow chart in detail. diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 875d5422f..fd41d6a3d 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -138,7 +138,7 @@ def test_image_passed_to_llm_to_generate_caption( "messages": [ { "role": "system", - "content": """You are a assistant that generates rich descriptions of images. + "content": """You are an assistant that generates rich descriptions of images. You need to be accurate in the information you extract and detailed in the descriptons you generate. Do not abbreviate anything and do not shorten sentances. Explain the image completely. If you are provided with an image of a flow chart, describe the flow chart in detail. diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index c2206e84f..fa5434067 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -151,7 +151,7 @@ def test_embed_file_advanced_image_processing_uses_vision_model_for_captioning( [ { "role": "system", - "content": """You are a assistant that generates rich descriptions of images. + "content": """You are an assistant that generates rich descriptions of images. You need to be accurate in the information you extract and detailed in the descriptons you generate. Do not abbreviate anything and do not shorten sentances. Explain the image completely. If you are provided with an image of a flow chart, describe the flow chart in detail. From 1d6df0094a57c630992d645efd40e74306019254 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Thu, 16 May 2024 15:10:48 +0000 Subject: [PATCH 5/6] Remove llm_vision_model --- code/backend/batch/utilities/helpers/llm_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/code/backend/batch/utilities/helpers/llm_helper.py b/code/backend/batch/utilities/helpers/llm_helper.py index 685cf85b7..bbbe83e52 100644 --- a/code/backend/batch/utilities/helpers/llm_helper.py +++ b/code/backend/batch/utilities/helpers/llm_helper.py @@ -29,7 +29,6 @@ def __init__(self): ) self.llm_model = self.env_helper.AZURE_OPENAI_MODEL - self.llm_vision_model = self.env_helper.AZURE_OPENAI_VISION_MODEL self.llm_max_tokens = ( self.env_helper.AZURE_OPENAI_MAX_TOKENS if self.env_helper.AZURE_OPENAI_MAX_TOKENS != "" From 3af522725e2e450754b30609dc810d92e9c659a6 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Thu, 16 May 2024 15:30:30 +0000 Subject: [PATCH 6/6] Make method private, remove "dangerous" default value --- .../batch/utilities/helpers/embedders/push_embedder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 1d3473a8b..58ba2f682 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -61,7 +61,7 @@ def __embed( ): logger.warning("Advanced image processing is not supported yet") - caption = self.generate_image_caption(source_url) + caption = self.__generate_image_caption(source_url) caption_vector = self.llm_helper.generate_embeddings(caption) image_vector = self.azure_computer_vision_client.vectorize_image(source_url) @@ -88,7 +88,7 @@ def __embed( logger.error("Failed to upload documents to search index") raise Exception(response) - def generate_image_caption(self, source_url): + def __generate_image_caption(self, source_url): model = self.env_helper.AZURE_OPENAI_VISION_MODEL caption_system_message = """You are an assistant that generates rich descriptions of images. You need to be accurate in the information you extract and detailed in the descriptons you generate. @@ -144,8 +144,8 @@ def __create_image_document( self, source_url: str, image_vector: List[float], - content: str = "", - content_vector: List[float] = [], + content: str, + content_vector: List[float], ): parsed_url = urlparse(source_url)