From a62bbb662dd1c8814c4c61e4e9f8575031904118 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Mon, 13 May 2024 17:11:59 +0100 Subject: [PATCH 01/20] feat: handle BlobDelete event type in `batch_push_results` --- code/backend/batch/BatchPushResults.py | 41 +++++++++++++------ code/backend/batch/function_app.py | 3 +- .../utilities/search/SearchHandlerBase.py | 27 +++++++++++- code/tests/test_BatchPushResults.py | 9 ++-- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 18859f573..323b4ae95 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -7,14 +7,14 @@ from utilities.helpers.azure_blob_storage_client import AzureBlobStorageClient from utilities.helpers.env_helper import EnvHelper from utilities.helpers.embedders.embedder_factory import EmbedderFactory +from utilities.search.Search import Search bp_batch_push_results = func.Blueprint() logger = logging.getLogger(__name__) logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) -def _get_file_name_from_message(msg: func.QueueMessage) -> str: - message_body = json.loads(msg.get_body().decode("utf-8")) +def _get_file_name_from_message(message_body: any) -> str: return message_body.get( "filename", "/".join( @@ -27,21 +27,38 @@ def _get_file_name_from_message(msg: func.QueueMessage) -> str: arg_name="msg", queue_name="doc-processing", connection="AzureWebJobsStorage" ) def batch_push_results(msg: func.QueueMessage) -> None: - do_batch_push_results(msg) + message_body = json.loads(msg.get_body().decode("utf-8")) + logger.info("Process Document Event queue function triggered: %s", message_body) + + event_type = message_body.get("eventType", "") + # We handle "" in this scenario for backwards compatibility + # This function is primarily triggered by an Event Grid queue message from the blob storage + # However, it can also be triggered using a legacy schema from BatchStartProcessing + if event_type in ("", "Microsoft.Storage.BlobCreated"): + _process_document_created_event(message_body) + elif event_type == "Microsoft.Storage.BlobDeleted": + _process_document_deleted_event(message_body) -def do_batch_push_results(msg: func.QueueMessage) -> None: + else: + logger.error("Unknown event type received: %s", event_type) + raise NotImplementedError(f"Unknown event type received: {event_type}") + + +def _process_document_created_event(message_body: any) -> None: env_helper: EnvHelper = EnvHelper() - logger.info( - "Python queue trigger function processed a queue item: %s", - msg.get_body().decode("utf-8"), - ) blob_client = AzureBlobStorageClient() - # Get the file name from the message - file_name = _get_file_name_from_message(msg) - # Generate the SAS URL for the file + file_name = _get_file_name_from_message(message_body) file_sas = blob_client.get_blob_sas(file_name) - # Process the file + embedder = EmbedderFactory.create(env_helper) embedder.embed_file(file_sas, file_name) + + +def _process_document_deleted_event(message_body: any) -> None: + env_helper: EnvHelper = EnvHelper() + search_handler = Search.get_search_handler(env_helper) + + blob_url = message_body.get("data", {}).get("url", "") + search_handler.delete_by_source(f"{blob_url}_SAS_TOKEN_PLACEHOLDER_") diff --git a/code/backend/batch/function_app.py b/code/backend/batch/function_app.py index f26c6f706..d8fd6e602 100644 --- a/code/backend/batch/function_app.py +++ b/code/backend/batch/function_app.py @@ -10,7 +10,8 @@ logging.captureWarnings(True) # Raising the azure log level to WARN as it is too verbose - https://github.com/Azure/azure-sdk-for-python/issues/9422 logging.getLogger("azure").setLevel(os.environ.get("LOGLEVEL_AZURE", "WARN").upper()) -configure_azure_monitor() +if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": + configure_azure_monitor() app = func.FunctionApp( http_auth_level=func.AuthLevel.FUNCTION diff --git a/code/backend/batch/utilities/search/SearchHandlerBase.py b/code/backend/batch/utilities/search/SearchHandlerBase.py index b1dba8e96..fd62c096b 100644 --- a/code/backend/batch/utilities/search/SearchHandlerBase.py +++ b/code/backend/batch/utilities/search/SearchHandlerBase.py @@ -36,13 +36,36 @@ def get_files(self): pass @abstractmethod - def output_results(self, results, id_field): + def output_results(self, results): pass @abstractmethod - def delete_files(self, files, id_field): + def delete_files(self, files): pass @abstractmethod def query_search(self, question) -> list[SourceDocument]: pass + + def delete_by_source(self, source) -> None: + if source is None: + return + + documents = self._get_documents_by_source(source) + if documents is None: + return + + results = self.output_results(documents) + files_to_dete = {filename: ids for filename, ids in results.items()} + self.delete_files(files_to_dete) + + def _get_documents_by_source(self, source): + if source is None: + return None + + return self.search_client.search( + "*", + select="id, title", + include_total_count=True, + filter=f"source eq '{source}'", + ) diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index 0bd566f0a..cb79d23ea 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -1,3 +1,4 @@ +import json import sys import os import pytest @@ -24,8 +25,8 @@ def test_get_file_name_from_message(): mock_queue_message = QueueMessage( body='{"message": "test message", "filename": "test_filename.md"}' ) - - file_name = _get_file_name_from_message(mock_queue_message) + message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + file_name = _get_file_name_from_message(message_body) assert file_name == "test_filename.md" @@ -34,8 +35,8 @@ def test_get_file_name_from_message_no_filename(): mock_queue_message = QueueMessage( body='{"data": { "url": "test/test/test_filename.md"} }' ) - - file_name = _get_file_name_from_message(mock_queue_message) + message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + file_name = _get_file_name_from_message(message_body) assert file_name == "test_filename.md" From 4740dacb14aedd7d7e5c2865b59886c745055763 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Mon, 13 May 2024 19:19:59 +0100 Subject: [PATCH 02/20] tests: initial tests for batch_push_results control logic --- code/tests/test_BatchPushResults.py | 53 ++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index cb79d23ea..d1ebdd5f6 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -41,10 +41,61 @@ def test_get_file_name_from_message_no_filename(): assert file_name == "test_filename.md" +def test_batch_push_results_with_unhandled_event_type(): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobUpdated"}' + ) + + with pytest.raises(NotImplementedError): + batch_push_results.build().get_user_function()(mock_queue_message) + + +@patch("backend.batch.BatchPushResults._process_document_created_event") +def test_batch_push_results_with_blob_created_event( + mock_process_document_created_event, +): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_created_event.assert_called_once_with(expected_message_body) + + +@patch("backend.batch.BatchPushResults._process_document_created_event") +def test_batch_push_results_with_no_event(mock_process_document_created_event): + mock_queue_message = QueueMessage( + body='{"data": { "url": "test/test/test_filename.md"} }' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_created_event.assert_called_once_with(expected_message_body) + + +@patch("backend.batch.BatchPushResults._process_document_deleted_event") +def test_batch_push_results_with_blob_deleted_event( + mock_process_document_deleted_event, +): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobDeleted", "filename": "test/test/test_filename.md"}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_deleted_event.assert_called_once_with(expected_message_body) + + @patch("backend.batch.BatchPushResults.EnvHelper") @patch("backend.batch.BatchPushResults.AzureBlobStorageClient") def test_batch_push_results( - mock_azure_blob_storage_client, mock_env_helper, get_processor_handler_mock + mock_azure_blob_storage_client, + mock_env_helper, + get_processor_handler_mock: batch_push_results, ): mock_queue_message = QueueMessage( body='{"message": "test message", "filename": "test/test/test_filename.md"}' From 5c28dc71119a1c57b6d51dba30125e7838b08ec0 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Tue, 14 May 2024 08:40:35 +0100 Subject: [PATCH 03/20] tests: additional tests for batch push results --- code/tests/test_BatchPushResults.py | 32 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py index d1ebdd5f6..7a686895a 100644 --- a/code/tests/test_BatchPushResults.py +++ b/code/tests/test_BatchPushResults.py @@ -16,9 +16,11 @@ @pytest.fixture(autouse=True) def get_processor_handler_mock(): - with patch("backend.batch.BatchPushResults.EmbedderFactory.create") as mock: - processor_handler = mock.return_value - yield processor_handler + with patch("backend.batch.BatchPushResults.EmbedderFactory.create") as mock_create_embedder, \ + patch("backend.batch.BatchPushResults.Search.get_search_handler") as mock_get_search_handler: + processor_handler_create = mock_create_embedder.return_value + processor_handler_get_search_handler = mock_get_search_handler.return_value + yield processor_handler_create, processor_handler_get_search_handler def test_get_file_name_from_message(): @@ -92,19 +94,35 @@ def test_batch_push_results_with_blob_deleted_event( @patch("backend.batch.BatchPushResults.EnvHelper") @patch("backend.batch.BatchPushResults.AzureBlobStorageClient") -def test_batch_push_results( +def test_batch_push_results_with_blob_created_event_uses_embedder( mock_azure_blob_storage_client, mock_env_helper, - get_processor_handler_mock: batch_push_results, + get_processor_handler_mock, ): + mock_create_embedder, mock_get_search_handler = get_processor_handler_mock + mock_queue_message = QueueMessage( - body='{"message": "test message", "filename": "test/test/test_filename.md"}' + body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' ) mock_blob_client_instance = mock_azure_blob_storage_client.return_value mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" batch_push_results.build().get_user_function()(mock_queue_message) - get_processor_handler_mock.embed_file.assert_called_once_with( + mock_create_embedder.embed_file.assert_called_once_with( "test_blob_sas", "test/test/test_filename.md" ) + +@patch("backend.batch.BatchPushResults.EnvHelper") +def test_batch_push_results_with_blob_deleted_event_uses_search_to_delete_with_sas_appended( + mock_env_helper, + get_processor_handler_mock, +): + mock_create_embedder, mock_get_search_handler = get_processor_handler_mock + + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobDeleted", "data": { "url": "https://test.test/test/test_filename.pdf"}}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + mock_get_search_handler.delete_by_source.assert_called_once_with("https://test.test/test/test_filename.pdf_SAS_TOKEN_PLACEHOLDER_") From 1afac888ca60520e1c4d951878f5ca9d64698a99 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Tue, 14 May 2024 09:17:04 +0100 Subject: [PATCH 04/20] refactor: remove redundant error log --- code/backend/batch/BatchPushResults.py | 1 - 1 file changed, 1 deletion(-) diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index 323b4ae95..a7bd06199 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -41,7 +41,6 @@ def batch_push_results(msg: func.QueueMessage) -> None: _process_document_deleted_event(message_body) else: - logger.error("Unknown event type received: %s", event_type) raise NotImplementedError(f"Unknown event type received: {event_type}") From 5b4b15f6bb015c8af48b2c8111b4600a4c9b125c Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Tue, 14 May 2024 09:19:41 +0100 Subject: [PATCH 05/20] fix: update logger to debug --- code/backend/batch/BatchPushResults.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/BatchPushResults.py index a7bd06199..f1db00a60 100644 --- a/code/backend/batch/BatchPushResults.py +++ b/code/backend/batch/BatchPushResults.py @@ -28,7 +28,7 @@ def _get_file_name_from_message(message_body: any) -> str: ) def batch_push_results(msg: func.QueueMessage) -> None: message_body = json.loads(msg.get_body().decode("utf-8")) - logger.info("Process Document Event queue function triggered: %s", message_body) + logger.debug("Process Document Event queue function triggered: %s", message_body) event_type = message_body.get("eventType", "") # We handle "" in this scenario for backwards compatibility From 63026d8f4429736bd0fd488d0e3cd65f5f607c8d Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 10:11:57 +0100 Subject: [PATCH 06/20] refactor: remove redundant line in delete by source Co-authored-by: Arpit Gaur --- .vscode/settings.json | 1 + code/backend/batch/utilities/search/SearchHandlerBase.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index bb92a125e..7c03e4d62 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,6 +4,7 @@ "azureFunctions.scmDoBuildDuringDeployment": true, "azureFunctions.projectLanguage": "Python", "azureFunctions.projectRuntime": "~4", + "azureFunctions.pythonVenv": "${workspaceFolder}/.venv", "debug.internalConsoleOptions": "neverOpen", "azureFunctions.projectLanguageModel": 2, "files.insertFinalNewline": true, diff --git a/code/backend/batch/utilities/search/SearchHandlerBase.py b/code/backend/batch/utilities/search/SearchHandlerBase.py index fd62c096b..7262eef7e 100644 --- a/code/backend/batch/utilities/search/SearchHandlerBase.py +++ b/code/backend/batch/utilities/search/SearchHandlerBase.py @@ -55,9 +55,8 @@ def delete_by_source(self, source) -> None: if documents is None: return - results = self.output_results(documents) - files_to_dete = {filename: ids for filename, ids in results.items()} - self.delete_files(files_to_dete) + files_to_delete = self.output_results(documents) + self.delete_files(files_to_delete) def _get_documents_by_source(self, source): if source is None: From a220d391ebcce93269b4716c3d2f69b4631b16d6 Mon Sep 17 00:00:00 2001 From: Arpit Gaur Date: Tue, 14 May 2024 15:03:51 +0100 Subject: [PATCH 07/20] refactor: Make backend search, backend tools and remainder backend files PEP8 compliant (#899) --- code/backend/{Admin.py => admin.py} | 0 ...URLEmbeddings.py => add_url_embeddings.py} | 0 ...chPushResults.py => batch_push_results.py} | 0 ...rocessing.py => batch_start_processing.py} | 0 code/backend/batch/function_app.py | 8 +- ...sponse.py => get_conversation_response.py} | 0 .../utilities/common/{Answer.py => answer.py} | 2 +- .../{SourceDocument.py => source_document.py} | 0 .../document_chunking_base.py | 2 +- .../document_chunking/fixed_size_overlap.py | 2 +- .../utilities/document_chunking/layout.py | 2 +- .../batch/utilities/document_chunking/page.py | 2 +- .../utilities/document_chunking/paragraph.py | 2 +- .../document_loading/document_loading_base.py | 2 +- .../utilities/document_loading/layout.py | 2 +- .../batch/utilities/document_loading/read.py | 2 +- .../batch/utilities/document_loading/web.py | 2 +- .../document_loading/word_document.py | 2 +- .../integrated_vectorization_embedder.py | 8 +- .../helpers/embedders/push_embedder.py | 2 +- ...tasource.py => azure_search_datasource.py} | 0 ...reSearchIndex.py => azure_search_index.py} | 0 ...archIndexer.py => azure_search_indexer.py} | 0 ...chSkillset.py => azure_search_skillset.py} | 0 ...sationLogger.py => conversation_logger.py} | 0 .../orchestrator/lang_chain_agent.py | 8 +- .../orchestrator/open_ai_functions.py | 8 +- .../orchestrator/orchestrator_base.py | 4 +- .../utilities/orchestrator/semantic_kernel.py | 2 +- .../utilities/parser/output_parser_tool.py | 2 +- .../batch/utilities/parser/parser_base.py | 2 +- .../batch/utilities/plugins/chat_plugin.py | 6 +- .../plugins/post_answering_plugin.py | 4 +- ...archHandler.py => azure_search_handler.py} | 4 +- ...ntegrated_vectorization_search_handler.py} | 4 +- .../utilities/search/{Search.py => search.py} | 6 +- ...hHandlerBase.py => search_handler_base.py} | 2 +- ...ssingBase.py => answer_processing_base.py} | 2 +- ...ringToolBase.py => answering_tool_base.py} | 2 +- ...tyChecker.py => content_safety_checker.py} | 4 +- ...{PostPromptTool.py => post_prompt_tool.py} | 2 +- ...nAnswerTool.py => question_answer_tool.py} | 8 +- ...cessingTool.py => text_processing_tool.py} | 4 +- code/backend/pages/02_Explore_Data.py | 2 +- code/backend/pages/03_Delete_Data.py | 2 +- code/tests/common/test_source_document.py | 4 +- .../test_advanced_image_processing.py | 2 +- ...andler.py => test_azure_search_handler.py} | 14 +- ...ntegrated_vectorization_search_handler.py} | 10 +- .../{test_Search.py => test_search.py} | 10 +- code/tests/test_BatchPushResults.py | 128 ------------------ ...beddings.py => test_add_url_embeddings.py} | 18 +-- ...bStorage.py => test_azure_blob_storage.py} | 0 code/tests/test_batch_push_results.py | 58 ++++++++ ...sing.py => test_batch_start_processing.py} | 14 +- ...cker.py => test_content_safety_checker.py} | 2 +- ...rocessor.py => test_document_processor.py} | 0 ...e.py => test_get_conversation_response.py} | 10 +- ...rserTool.py => test_output_parser_tool.py} | 2 +- .../helpers/test_document_chunking_helper.py | 2 +- .../utilities/helpers/test_push_embedder.py | 2 +- .../test_azure_search_datasource.py | 10 +- .../test_azure_search_index.py | 8 +- .../test_azure_search_indexer.py | 8 +- .../test_azure_search_skillset.py | 6 +- .../orchestrator/test_lang_chain_agent.py | 2 +- .../orchestrator/test_semantic_kernel.py | 2 +- .../utilities/plugins/test_chat_plugin.py | 2 +- .../plugins/test_post_answering_plugin.py | 2 +- .../utilities/test_question_answer_tool.py | 22 +-- 70 files changed, 193 insertions(+), 263 deletions(-) rename code/backend/{Admin.py => admin.py} (100%) rename code/backend/batch/{AddURLEmbeddings.py => add_url_embeddings.py} (100%) rename code/backend/batch/{BatchPushResults.py => batch_push_results.py} (100%) rename code/backend/batch/{BatchStartProcessing.py => batch_start_processing.py} (100%) rename code/backend/batch/{GetConversationResponse.py => get_conversation_response.py} (100%) rename code/backend/batch/utilities/common/{Answer.py => answer.py} (97%) rename code/backend/batch/utilities/common/{SourceDocument.py => source_document.py} (100%) rename code/backend/batch/utilities/integrated_vectorization/{AzureSearchDatasource.py => azure_search_datasource.py} (100%) rename code/backend/batch/utilities/integrated_vectorization/{AzureSearchIndex.py => azure_search_index.py} (100%) rename code/backend/batch/utilities/integrated_vectorization/{AzureSearchIndexer.py => azure_search_indexer.py} (100%) rename code/backend/batch/utilities/integrated_vectorization/{AzureSearchSkillset.py => azure_search_skillset.py} (100%) rename code/backend/batch/utilities/loggers/{ConversationLogger.py => conversation_logger.py} (100%) rename code/backend/batch/utilities/search/{AzureSearchHandler.py => azure_search_handler.py} (97%) rename code/backend/batch/utilities/search/{IntegratedVectorizationSearchHandler.py => integrated_vectorization_search_handler.py} (97%) rename code/backend/batch/utilities/search/{Search.py => search.py} (74%) rename code/backend/batch/utilities/search/{SearchHandlerBase.py => search_handler_base.py} (97%) rename code/backend/batch/utilities/tools/{AnswerProcessingBase.py => answer_processing_base.py} (88%) rename code/backend/batch/utilities/tools/{AnsweringToolBase.py => answering_tool_base.py} (89%) rename code/backend/batch/utilities/tools/{ContentSafetyChecker.py => content_safety_checker.py} (97%) rename code/backend/batch/utilities/tools/{PostPromptTool.py => post_prompt_tool.py} (98%) rename code/backend/batch/utilities/tools/{QuestionAnswerTool.py => question_answer_tool.py} (96%) rename code/backend/batch/utilities/tools/{TextProcessingTool.py => text_processing_tool.py} (92%) rename code/tests/search_utilities/{test_AzureSearchHandler.py => test_azure_search_handler.py} (91%) rename code/tests/search_utilities/{test_IntegratedVectorizationSearchHandler.py => test_integrated_vectorization_search_handler.py} (92%) rename code/tests/search_utilities/{test_Search.py => test_search.py} (90%) delete mode 100644 code/tests/test_BatchPushResults.py rename code/tests/{test_AddURLEmbeddings.py => test_add_url_embeddings.py} (87%) rename code/tests/{test_AzureBlobStorage.py => test_azure_blob_storage.py} (100%) create mode 100644 code/tests/test_batch_push_results.py rename code/tests/{test_BatchStartProcessing.py => test_batch_start_processing.py} (84%) rename code/tests/{test_ContentSafetyChecker.py => test_content_safety_checker.py} (87%) rename code/tests/{test_DocumentProcessor.py => test_document_processor.py} (100%) rename code/tests/{test_GetConversationResponse.py => test_get_conversation_response.py} (86%) rename code/tests/{test_OutputParserTool.py => test_output_parser_tool.py} (98%) diff --git a/code/backend/Admin.py b/code/backend/admin.py similarity index 100% rename from code/backend/Admin.py rename to code/backend/admin.py diff --git a/code/backend/batch/AddURLEmbeddings.py b/code/backend/batch/add_url_embeddings.py similarity index 100% rename from code/backend/batch/AddURLEmbeddings.py rename to code/backend/batch/add_url_embeddings.py diff --git a/code/backend/batch/BatchPushResults.py b/code/backend/batch/batch_push_results.py similarity index 100% rename from code/backend/batch/BatchPushResults.py rename to code/backend/batch/batch_push_results.py diff --git a/code/backend/batch/BatchStartProcessing.py b/code/backend/batch/batch_start_processing.py similarity index 100% rename from code/backend/batch/BatchStartProcessing.py rename to code/backend/batch/batch_start_processing.py diff --git a/code/backend/batch/function_app.py b/code/backend/batch/function_app.py index d8fd6e602..b2756c751 100644 --- a/code/backend/batch/function_app.py +++ b/code/backend/batch/function_app.py @@ -1,10 +1,10 @@ import logging import os import azure.functions as func -from AddURLEmbeddings import bp_add_url_embeddings -from BatchPushResults import bp_batch_push_results -from BatchStartProcessing import bp_batch_start_processing -from GetConversationResponse import bp_get_conversation_response +from add_url_embeddings import bp_add_url_embeddings +from batch_push_results import bp_batch_push_results +from batch_start_processing import bp_batch_start_processing +from get_conversation_response import bp_get_conversation_response from azure.monitor.opentelemetry import configure_azure_monitor logging.captureWarnings(True) diff --git a/code/backend/batch/GetConversationResponse.py b/code/backend/batch/get_conversation_response.py similarity index 100% rename from code/backend/batch/GetConversationResponse.py rename to code/backend/batch/get_conversation_response.py diff --git a/code/backend/batch/utilities/common/Answer.py b/code/backend/batch/utilities/common/answer.py similarity index 97% rename from code/backend/batch/utilities/common/Answer.py rename to code/backend/batch/utilities/common/answer.py index 33222c64e..19e56fecd 100644 --- a/code/backend/batch/utilities/common/Answer.py +++ b/code/backend/batch/utilities/common/answer.py @@ -1,6 +1,6 @@ import json from typing import List, Optional -from .SourceDocument import SourceDocument +from .source_document import SourceDocument class Answer: diff --git a/code/backend/batch/utilities/common/SourceDocument.py b/code/backend/batch/utilities/common/source_document.py similarity index 100% rename from code/backend/batch/utilities/common/SourceDocument.py rename to code/backend/batch/utilities/common/source_document.py diff --git a/code/backend/batch/utilities/document_chunking/document_chunking_base.py b/code/backend/batch/utilities/document_chunking/document_chunking_base.py index 178f89769..9b28d8fa7 100644 --- a/code/backend/batch/utilities/document_chunking/document_chunking_base.py +++ b/code/backend/batch/utilities/document_chunking/document_chunking_base.py @@ -1,7 +1,7 @@ # Create an abstract class for document loading from typing import List from abc import ABC, abstractmethod -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument from .chunking_strategy import ChunkingSettings diff --git a/code/backend/batch/utilities/document_chunking/fixed_size_overlap.py b/code/backend/batch/utilities/document_chunking/fixed_size_overlap.py index a6341b6e0..4baa83d2a 100644 --- a/code/backend/batch/utilities/document_chunking/fixed_size_overlap.py +++ b/code/backend/batch/utilities/document_chunking/fixed_size_overlap.py @@ -2,7 +2,7 @@ from .document_chunking_base import DocumentChunkingBase from langchain.text_splitter import TokenTextSplitter from .chunking_strategy import ChunkingSettings -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class FixedSizeOverlapDocumentChunking(DocumentChunkingBase): diff --git a/code/backend/batch/utilities/document_chunking/layout.py b/code/backend/batch/utilities/document_chunking/layout.py index 1c07df86a..dd90f84d5 100644 --- a/code/backend/batch/utilities/document_chunking/layout.py +++ b/code/backend/batch/utilities/document_chunking/layout.py @@ -2,7 +2,7 @@ from .document_chunking_base import DocumentChunkingBase from langchain.text_splitter import MarkdownTextSplitter from .chunking_strategy import ChunkingSettings -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class LayoutDocumentChunking(DocumentChunkingBase): diff --git a/code/backend/batch/utilities/document_chunking/page.py b/code/backend/batch/utilities/document_chunking/page.py index ab52ae90a..012cffcea 100644 --- a/code/backend/batch/utilities/document_chunking/page.py +++ b/code/backend/batch/utilities/document_chunking/page.py @@ -2,7 +2,7 @@ from .document_chunking_base import DocumentChunkingBase from langchain.text_splitter import MarkdownTextSplitter from .chunking_strategy import ChunkingSettings -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class PageDocumentChunking(DocumentChunkingBase): diff --git a/code/backend/batch/utilities/document_chunking/paragraph.py b/code/backend/batch/utilities/document_chunking/paragraph.py index 2499cda8b..e62f2d146 100644 --- a/code/backend/batch/utilities/document_chunking/paragraph.py +++ b/code/backend/batch/utilities/document_chunking/paragraph.py @@ -1,7 +1,7 @@ from typing import List from .document_chunking_base import DocumentChunkingBase from .chunking_strategy import ChunkingSettings -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class ParagraphDocumentChunking(DocumentChunkingBase): diff --git a/code/backend/batch/utilities/document_loading/document_loading_base.py b/code/backend/batch/utilities/document_loading/document_loading_base.py index 1309a0383..6a9090be3 100644 --- a/code/backend/batch/utilities/document_loading/document_loading_base.py +++ b/code/backend/batch/utilities/document_loading/document_loading_base.py @@ -1,7 +1,7 @@ # Create an abstract class for document loading from typing import List from abc import ABC, abstractmethod -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class DocumentLoadingBase(ABC): diff --git a/code/backend/batch/utilities/document_loading/layout.py b/code/backend/batch/utilities/document_loading/layout.py index 752cb7ff4..c89f1e5ba 100644 --- a/code/backend/batch/utilities/document_loading/layout.py +++ b/code/backend/batch/utilities/document_loading/layout.py @@ -1,7 +1,7 @@ from typing import List from .document_loading_base import DocumentLoadingBase from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class LayoutDocumentLoading(DocumentLoadingBase): diff --git a/code/backend/batch/utilities/document_loading/read.py b/code/backend/batch/utilities/document_loading/read.py index 6a88d53f8..470c6afde 100644 --- a/code/backend/batch/utilities/document_loading/read.py +++ b/code/backend/batch/utilities/document_loading/read.py @@ -1,7 +1,7 @@ from typing import List from .document_loading_base import DocumentLoadingBase from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class ReadDocumentLoading(DocumentLoadingBase): diff --git a/code/backend/batch/utilities/document_loading/web.py b/code/backend/batch/utilities/document_loading/web.py index 14f9af44d..f0c8fa631 100644 --- a/code/backend/batch/utilities/document_loading/web.py +++ b/code/backend/batch/utilities/document_loading/web.py @@ -3,7 +3,7 @@ from langchain.docstore.document import Document from langchain_community.document_loaders import WebBaseLoader from .document_loading_base import DocumentLoadingBase -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class WebDocumentLoading(DocumentLoadingBase): diff --git a/code/backend/batch/utilities/document_loading/word_document.py b/code/backend/batch/utilities/document_loading/word_document.py index c55fddd06..6e59293ab 100644 --- a/code/backend/batch/utilities/document_loading/word_document.py +++ b/code/backend/batch/utilities/document_loading/word_document.py @@ -3,7 +3,7 @@ from docx import Document import requests from .document_loading_base import DocumentLoadingBase -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class WordDocumentLoading(DocumentLoadingBase): diff --git a/code/backend/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py b/code/backend/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py index 1e8b35253..0e74a83e8 100644 --- a/code/backend/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py @@ -1,10 +1,10 @@ from .embedder_base import EmbedderBase from ..env_helper import EnvHelper from ..llm_helper import LLMHelper -from ...integrated_vectorization.AzureSearchIndex import AzureSearchIndex -from ...integrated_vectorization.AzureSearchIndexer import AzureSearchIndexer -from ...integrated_vectorization.AzureSearchDatasource import AzureSearchDatasource -from ...integrated_vectorization.AzureSearchSkillset import AzureSearchSkillset +from ...integrated_vectorization.azure_search_index import AzureSearchIndex +from ...integrated_vectorization.azure_search_indexer import AzureSearchIndexer +from ...integrated_vectorization.azure_search_datasource import AzureSearchDatasource +from ...integrated_vectorization.azure_search_skillset import AzureSearchSkillset from ..config.config_helper import ConfigHelper import logging diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 342a5633b..9f793e150 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -13,7 +13,7 @@ from ..azure_search_helper import AzureSearchHelper from ..document_loading_helper import DocumentLoading from ..document_chunking_helper import DocumentChunking -from ...common.SourceDocument import SourceDocument +from ...common.source_document import SourceDocument logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/integrated_vectorization/AzureSearchDatasource.py b/code/backend/batch/utilities/integrated_vectorization/azure_search_datasource.py similarity index 100% rename from code/backend/batch/utilities/integrated_vectorization/AzureSearchDatasource.py rename to code/backend/batch/utilities/integrated_vectorization/azure_search_datasource.py diff --git a/code/backend/batch/utilities/integrated_vectorization/AzureSearchIndex.py b/code/backend/batch/utilities/integrated_vectorization/azure_search_index.py similarity index 100% rename from code/backend/batch/utilities/integrated_vectorization/AzureSearchIndex.py rename to code/backend/batch/utilities/integrated_vectorization/azure_search_index.py diff --git a/code/backend/batch/utilities/integrated_vectorization/AzureSearchIndexer.py b/code/backend/batch/utilities/integrated_vectorization/azure_search_indexer.py similarity index 100% rename from code/backend/batch/utilities/integrated_vectorization/AzureSearchIndexer.py rename to code/backend/batch/utilities/integrated_vectorization/azure_search_indexer.py diff --git a/code/backend/batch/utilities/integrated_vectorization/AzureSearchSkillset.py b/code/backend/batch/utilities/integrated_vectorization/azure_search_skillset.py similarity index 100% rename from code/backend/batch/utilities/integrated_vectorization/AzureSearchSkillset.py rename to code/backend/batch/utilities/integrated_vectorization/azure_search_skillset.py diff --git a/code/backend/batch/utilities/loggers/ConversationLogger.py b/code/backend/batch/utilities/loggers/conversation_logger.py similarity index 100% rename from code/backend/batch/utilities/loggers/ConversationLogger.py rename to code/backend/batch/utilities/loggers/conversation_logger.py diff --git a/code/backend/batch/utilities/orchestrator/lang_chain_agent.py b/code/backend/batch/utilities/orchestrator/lang_chain_agent.py index 66a469fcb..18774231e 100644 --- a/code/backend/batch/utilities/orchestrator/lang_chain_agent.py +++ b/code/backend/batch/utilities/orchestrator/lang_chain_agent.py @@ -8,10 +8,10 @@ from .orchestrator_base import OrchestratorBase from ..helpers.llm_helper import LLMHelper -from ..tools.PostPromptTool import PostPromptTool -from ..tools.QuestionAnswerTool import QuestionAnswerTool -from ..tools.TextProcessingTool import TextProcessingTool -from ..common.Answer import Answer +from ..tools.post_prompt_tool import PostPromptTool +from ..tools.question_answer_tool import QuestionAnswerTool +from ..tools.text_processing_tool import TextProcessingTool +from ..common.answer import Answer logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/orchestrator/open_ai_functions.py b/code/backend/batch/utilities/orchestrator/open_ai_functions.py index d46ecae31..fcd611184 100644 --- a/code/backend/batch/utilities/orchestrator/open_ai_functions.py +++ b/code/backend/batch/utilities/orchestrator/open_ai_functions.py @@ -4,10 +4,10 @@ from .orchestrator_base import OrchestratorBase from ..helpers.llm_helper import LLMHelper -from ..tools.PostPromptTool import PostPromptTool -from ..tools.QuestionAnswerTool import QuestionAnswerTool -from ..tools.TextProcessingTool import TextProcessingTool -from ..common.Answer import Answer +from ..tools.post_prompt_tool import PostPromptTool +from ..tools.question_answer_tool import QuestionAnswerTool +from ..tools.text_processing_tool import TextProcessingTool +from ..common.answer import Answer logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/orchestrator/orchestrator_base.py b/code/backend/batch/utilities/orchestrator/orchestrator_base.py index c514d2863..1073b9ec0 100644 --- a/code/backend/batch/utilities/orchestrator/orchestrator_base.py +++ b/code/backend/batch/utilities/orchestrator/orchestrator_base.py @@ -2,10 +2,10 @@ from uuid import uuid4 from typing import List, Optional from abc import ABC, abstractmethod -from ..loggers.ConversationLogger import ConversationLogger +from ..loggers.conversation_logger import ConversationLogger from ..helpers.config.config_helper import ConfigHelper from ..parser.output_parser_tool import OutputParserTool -from ..tools.ContentSafetyChecker import ContentSafetyChecker +from ..tools.content_safety_checker import ContentSafetyChecker logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/orchestrator/semantic_kernel.py b/code/backend/batch/utilities/orchestrator/semantic_kernel.py index 35b9fd27d..27b1b9fc5 100644 --- a/code/backend/batch/utilities/orchestrator/semantic_kernel.py +++ b/code/backend/batch/utilities/orchestrator/semantic_kernel.py @@ -7,7 +7,7 @@ from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.finish_reason import FinishReason -from ..common.Answer import Answer +from ..common.answer import Answer from ..helpers.llm_helper import LLMHelper from ..plugins.chat_plugin import ChatPlugin from ..plugins.post_answering_plugin import PostAnsweringPlugin diff --git a/code/backend/batch/utilities/parser/output_parser_tool.py b/code/backend/batch/utilities/parser/output_parser_tool.py index c26aaf82d..4455ac20b 100644 --- a/code/backend/batch/utilities/parser/output_parser_tool.py +++ b/code/backend/batch/utilities/parser/output_parser_tool.py @@ -3,7 +3,7 @@ import re import json from .parser_base import ParserBase -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/parser/parser_base.py b/code/backend/batch/utilities/parser/parser_base.py index 1fb987d87..f9b703b3f 100644 --- a/code/backend/batch/utilities/parser/parser_base.py +++ b/code/backend/batch/utilities/parser/parser_base.py @@ -1,7 +1,7 @@ # Create an abstract class for parser from abc import ABC, abstractmethod from typing import List -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class ParserBase(ABC): diff --git a/code/backend/batch/utilities/plugins/chat_plugin.py b/code/backend/batch/utilities/plugins/chat_plugin.py index 2d672537a..a5bc5c2ef 100644 --- a/code/backend/batch/utilities/plugins/chat_plugin.py +++ b/code/backend/batch/utilities/plugins/chat_plugin.py @@ -2,9 +2,9 @@ from semantic_kernel.functions import kernel_function -from ..common.Answer import Answer -from ..tools.QuestionAnswerTool import QuestionAnswerTool -from ..tools.TextProcessingTool import TextProcessingTool +from ..common.answer import Answer +from ..tools.question_answer_tool import QuestionAnswerTool +from ..tools.text_processing_tool import TextProcessingTool class ChatPlugin: diff --git a/code/backend/batch/utilities/plugins/post_answering_plugin.py b/code/backend/batch/utilities/plugins/post_answering_plugin.py index 1c5d87378..da613955f 100644 --- a/code/backend/batch/utilities/plugins/post_answering_plugin.py +++ b/code/backend/batch/utilities/plugins/post_answering_plugin.py @@ -1,8 +1,8 @@ from semantic_kernel.functions import kernel_function from semantic_kernel.functions.kernel_arguments import KernelArguments -from ..common.Answer import Answer -from ..tools.PostPromptTool import PostPromptTool +from ..common.answer import Answer +from ..tools.post_prompt_tool import PostPromptTool class PostAnsweringPlugin: diff --git a/code/backend/batch/utilities/search/AzureSearchHandler.py b/code/backend/batch/utilities/search/azure_search_handler.py similarity index 97% rename from code/backend/batch/utilities/search/AzureSearchHandler.py rename to code/backend/batch/utilities/search/azure_search_handler.py index 33052ebb8..65c330c60 100644 --- a/code/backend/batch/utilities/search/AzureSearchHandler.py +++ b/code/backend/batch/utilities/search/azure_search_handler.py @@ -1,8 +1,8 @@ from typing import List -from .SearchHandlerBase import SearchHandlerBase +from .search_handler_base import SearchHandlerBase from ..helpers.llm_helper import LLMHelper from ..helpers.azure_search_helper import AzureSearchHelper -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument import json from azure.search.documents.models import VectorizedQuery import tiktoken diff --git a/code/backend/batch/utilities/search/IntegratedVectorizationSearchHandler.py b/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py similarity index 97% rename from code/backend/batch/utilities/search/IntegratedVectorizationSearchHandler.py rename to code/backend/batch/utilities/search/integrated_vectorization_search_handler.py index b3b501425..a85b6e44c 100644 --- a/code/backend/batch/utilities/search/IntegratedVectorizationSearchHandler.py +++ b/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py @@ -1,11 +1,11 @@ from typing import List -from .SearchHandlerBase import SearchHandlerBase +from .search_handler_base import SearchHandlerBase from azure.search.documents import SearchClient from azure.search.documents.indexes import SearchIndexClient from azure.search.documents.models import VectorizableTextQuery from azure.core.credentials import AzureKeyCredential from azure.identity import DefaultAzureCredential -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument import re diff --git a/code/backend/batch/utilities/search/Search.py b/code/backend/batch/utilities/search/search.py similarity index 74% rename from code/backend/batch/utilities/search/Search.py rename to code/backend/batch/utilities/search/search.py index 6d16f3935..1c0a37789 100644 --- a/code/backend/batch/utilities/search/Search.py +++ b/code/backend/batch/utilities/search/search.py @@ -1,8 +1,8 @@ -from ..search.AzureSearchHandler import AzureSearchHandler -from ..search.IntegratedVectorizationSearchHandler import ( +from ..search.azure_search_handler import AzureSearchHandler +from ..search.integrated_vectorization_search_handler import ( IntegratedVectorizationSearchHandler, ) -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument from ..helpers.env_helper import EnvHelper diff --git a/code/backend/batch/utilities/search/SearchHandlerBase.py b/code/backend/batch/utilities/search/search_handler_base.py similarity index 97% rename from code/backend/batch/utilities/search/SearchHandlerBase.py rename to code/backend/batch/utilities/search/search_handler_base.py index 7262eef7e..5e3443e5c 100644 --- a/code/backend/batch/utilities/search/SearchHandlerBase.py +++ b/code/backend/batch/utilities/search/search_handler_base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from ..helpers.env_helper import EnvHelper -from ..common.SourceDocument import SourceDocument +from ..common.source_document import SourceDocument class SearchHandlerBase(ABC): diff --git a/code/backend/batch/utilities/tools/AnswerProcessingBase.py b/code/backend/batch/utilities/tools/answer_processing_base.py similarity index 88% rename from code/backend/batch/utilities/tools/AnswerProcessingBase.py rename to code/backend/batch/utilities/tools/answer_processing_base.py index 040c00c4e..4b1b8a1d1 100644 --- a/code/backend/batch/utilities/tools/AnswerProcessingBase.py +++ b/code/backend/batch/utilities/tools/answer_processing_base.py @@ -1,6 +1,6 @@ # Create an abstract class for tool from abc import ABC, abstractmethod -from ..common.Answer import Answer +from ..common.answer import Answer class AnswerProcessingBase(ABC): diff --git a/code/backend/batch/utilities/tools/AnsweringToolBase.py b/code/backend/batch/utilities/tools/answering_tool_base.py similarity index 89% rename from code/backend/batch/utilities/tools/AnsweringToolBase.py rename to code/backend/batch/utilities/tools/answering_tool_base.py index a4f1fb2ac..235ef2bfd 100644 --- a/code/backend/batch/utilities/tools/AnsweringToolBase.py +++ b/code/backend/batch/utilities/tools/answering_tool_base.py @@ -1,7 +1,7 @@ # Create an abstract class for tool from abc import ABC, abstractmethod from typing import List -from ..common.Answer import Answer +from ..common.answer import Answer class AnsweringToolBase(ABC): diff --git a/code/backend/batch/utilities/tools/ContentSafetyChecker.py b/code/backend/batch/utilities/tools/content_safety_checker.py similarity index 97% rename from code/backend/batch/utilities/tools/ContentSafetyChecker.py rename to code/backend/batch/utilities/tools/content_safety_checker.py index 681531975..d04c77f23 100644 --- a/code/backend/batch/utilities/tools/ContentSafetyChecker.py +++ b/code/backend/batch/utilities/tools/content_safety_checker.py @@ -5,8 +5,8 @@ from azure.core.exceptions import HttpResponseError from azure.ai.contentsafety.models import AnalyzeTextOptions from ..helpers.env_helper import EnvHelper -from .AnswerProcessingBase import AnswerProcessingBase -from ..common.Answer import Answer +from .answer_processing_base import AnswerProcessingBase +from ..common.answer import Answer logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/tools/PostPromptTool.py b/code/backend/batch/utilities/tools/post_prompt_tool.py similarity index 98% rename from code/backend/batch/utilities/tools/PostPromptTool.py rename to code/backend/batch/utilities/tools/post_prompt_tool.py index ef13985d0..728cdeb52 100644 --- a/code/backend/batch/utilities/tools/PostPromptTool.py +++ b/code/backend/batch/utilities/tools/post_prompt_tool.py @@ -1,7 +1,7 @@ from langchain.chains.llm import LLMChain from langchain.prompts import PromptTemplate from langchain_community.callbacks import get_openai_callback -from ..common.Answer import Answer +from ..common.answer import Answer from ..helpers.llm_helper import LLMHelper from ..helpers.config.config_helper import ConfigHelper diff --git a/code/backend/batch/utilities/tools/QuestionAnswerTool.py b/code/backend/batch/utilities/tools/question_answer_tool.py similarity index 96% rename from code/backend/batch/utilities/tools/QuestionAnswerTool.py rename to code/backend/batch/utilities/tools/question_answer_tool.py index 37fef75c4..710f55c9d 100644 --- a/code/backend/batch/utilities/tools/QuestionAnswerTool.py +++ b/code/backend/batch/utilities/tools/question_answer_tool.py @@ -2,9 +2,9 @@ import logging import warnings -from ..common.SourceDocument import SourceDocument -from ..search.Search import Search -from .AnsweringToolBase import AnsweringToolBase +from ..common.source_document import SourceDocument +from ..search.search import Search +from .answering_tool_base import AnsweringToolBase from langchain.chains.llm import LLMChain from langchain.prompts import ( @@ -21,7 +21,7 @@ from ..helpers.config.config_helper import ConfigHelper from ..helpers.llm_helper import LLMHelper from ..helpers.env_helper import EnvHelper -from ..common.Answer import Answer +from ..common.answer import Answer logger = logging.getLogger(__name__) diff --git a/code/backend/batch/utilities/tools/TextProcessingTool.py b/code/backend/batch/utilities/tools/text_processing_tool.py similarity index 92% rename from code/backend/batch/utilities/tools/TextProcessingTool.py rename to code/backend/batch/utilities/tools/text_processing_tool.py index 1a0c0e33e..715418aae 100644 --- a/code/backend/batch/utilities/tools/TextProcessingTool.py +++ b/code/backend/batch/utilities/tools/text_processing_tool.py @@ -1,7 +1,7 @@ from typing import List from ..helpers.llm_helper import LLMHelper -from .AnsweringToolBase import AnsweringToolBase -from ..common.Answer import Answer +from .answering_tool_base import AnsweringToolBase +from ..common.answer import Answer class TextProcessingTool(AnsweringToolBase): diff --git a/code/backend/pages/02_Explore_Data.py b/code/backend/pages/02_Explore_Data.py index 593a2b476..60cc984b8 100644 --- a/code/backend/pages/02_Explore_Data.py +++ b/code/backend/pages/02_Explore_Data.py @@ -4,7 +4,7 @@ import sys import pandas as pd from batch.utilities.helpers.env_helper import EnvHelper -from batch.utilities.search.Search import Search +from batch.utilities.search.search import Search sys.path.append(os.path.join(os.path.dirname(__file__), "..")) env_helper: EnvHelper = EnvHelper() diff --git a/code/backend/pages/03_Delete_Data.py b/code/backend/pages/03_Delete_Data.py index f93a92aed..4eb3dd978 100644 --- a/code/backend/pages/03_Delete_Data.py +++ b/code/backend/pages/03_Delete_Data.py @@ -4,7 +4,7 @@ import sys import logging from batch.utilities.helpers.env_helper import EnvHelper -from batch.utilities.search.Search import Search +from batch.utilities.search.search import Search sys.path.append(os.path.join(os.path.dirname(__file__), "..")) env_helper: EnvHelper = EnvHelper() diff --git a/code/tests/common/test_source_document.py b/code/tests/common/test_source_document.py index f175e1953..8d6f8a63d 100644 --- a/code/tests/common/test_source_document.py +++ b/code/tests/common/test_source_document.py @@ -1,7 +1,7 @@ import hashlib from unittest.mock import patch from urllib.parse import urlparse -from backend.batch.utilities.common.SourceDocument import ( +from backend.batch.utilities.common.source_document import ( SourceDocument, SourceDocumentDecoder, SourceDocumentEncoder, @@ -27,7 +27,7 @@ def test_get_filename(): assert filename == "file" -@patch("backend.batch.utilities.common.SourceDocument.AzureBlobStorageClient") +@patch("backend.batch.utilities.common.source_document.AzureBlobStorageClient") def test_get_markdown_url(azure_blob_service_mock): # Given azure_blob_service_mock().get_container_sas.return_value = "_12345" diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 3f5942dc9..42550f2a4 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -17,7 +17,7 @@ os.path.join(os.path.dirname(sys.path[0]), "..", "..", "backend", "batch") ) -from backend.batch.BatchPushResults import batch_push_results # noqa: E402 +from backend.batch.batch_push_results import batch_push_results # noqa: E402 pytestmark = pytest.mark.functional diff --git a/code/tests/search_utilities/test_AzureSearchHandler.py b/code/tests/search_utilities/test_azure_search_handler.py similarity index 91% rename from code/tests/search_utilities/test_AzureSearchHandler.py rename to code/tests/search_utilities/test_azure_search_handler.py index 454c212c7..d4310e113 100644 --- a/code/tests/search_utilities/test_AzureSearchHandler.py +++ b/code/tests/search_utilities/test_azure_search_handler.py @@ -1,10 +1,10 @@ import pytest from unittest.mock import MagicMock, Mock, patch -from backend.batch.utilities.search.AzureSearchHandler import AzureSearchHandler +from backend.batch.utilities.search.azure_search_handler import AzureSearchHandler import json from azure.search.documents.models import VectorizedQuery -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument @pytest.fixture @@ -19,7 +19,7 @@ def env_helper_mock(): @pytest.fixture def mock_search_client(): with patch( - "backend.batch.utilities.search.AzureSearchHandler.AzureSearchHelper" + "backend.batch.utilities.search.azure_search_handler.AzureSearchHelper" ) as mock: search_client = mock.return_value.get_search_client.return_value yield search_client @@ -27,7 +27,7 @@ def mock_search_client(): @pytest.fixture def mock_llm_helper(): - with patch("backend.batch.utilities.search.AzureSearchHandler.LLMHelper") as mock: + with patch("backend.batch.utilities.search.azure_search_handler.LLMHelper") as mock: mock_llm_helper = mock.return_value yield mock_llm_helper @@ -35,11 +35,11 @@ def mock_llm_helper(): @pytest.fixture def handler(env_helper_mock, mock_search_client, mock_llm_helper): with patch( - "backend.batch.utilities.search.AzureSearchHandler.AzureSearchHelper", + "backend.batch.utilities.search.azure_search_handler.AzureSearchHelper", return_value=mock_search_client, ): with patch( - "backend.batch.utilities.search.AzureSearchHandler.LLMHelper", + "backend.batch.utilities.search.azure_search_handler.LLMHelper", return_value=mock_llm_helper, ): return AzureSearchHandler(env_helper_mock) @@ -126,7 +126,7 @@ def test_get_files(handler): ) -@patch("backend.batch.utilities.search.AzureSearchHandler.tiktoken") +@patch("backend.batch.utilities.search.azure_search_handler.tiktoken") def test_query_search_uses_tiktoken_encoder(mock_tiktoken, handler, mock_llm_helper): # given question = "What is the answer?" diff --git a/code/tests/search_utilities/test_IntegratedVectorizationSearchHandler.py b/code/tests/search_utilities/test_integrated_vectorization_search_handler.py similarity index 92% rename from code/tests/search_utilities/test_IntegratedVectorizationSearchHandler.py rename to code/tests/search_utilities/test_integrated_vectorization_search_handler.py index 3959f3a5b..c85e67107 100644 --- a/code/tests/search_utilities/test_IntegratedVectorizationSearchHandler.py +++ b/code/tests/search_utilities/test_integrated_vectorization_search_handler.py @@ -1,11 +1,11 @@ import pytest from unittest.mock import Mock, patch -from backend.batch.utilities.search.IntegratedVectorizationSearchHandler import ( +from backend.batch.utilities.search.integrated_vectorization_search_handler import ( IntegratedVectorizationSearchHandler, ) from azure.search.documents.models import VectorizableTextQuery -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument @pytest.fixture @@ -38,7 +38,7 @@ def search_index_does_not_exists_mock(): @pytest.fixture def search_client_mock(): with patch( - "backend.batch.utilities.search.IntegratedVectorizationSearchHandler.SearchClient" + "backend.batch.utilities.search.integrated_vectorization_search_handler.SearchClient" ) as mock: yield mock @@ -46,7 +46,7 @@ def search_client_mock(): @pytest.fixture def handler(env_helper_mock, search_client_mock, search_index_mock): with patch( - "backend.batch.utilities.search.IntegratedVectorizationSearchHandler.SearchClient", + "backend.batch.utilities.search.integrated_vectorization_search_handler.SearchClient", return_value=search_client_mock, ): return IntegratedVectorizationSearchHandler(env_helper_mock) @@ -57,7 +57,7 @@ def handler_index_does_not_exists( env_helper_mock, search_client_mock, search_index_does_not_exists_mock ): with patch( - "backend.batch.utilities.search.IntegratedVectorizationSearchHandler.SearchClient", + "backend.batch.utilities.search.integrated_vectorization_search_handler.SearchClient", return_value=search_client_mock, ): return IntegratedVectorizationSearchHandler(env_helper_mock) diff --git a/code/tests/search_utilities/test_Search.py b/code/tests/search_utilities/test_search.py similarity index 90% rename from code/tests/search_utilities/test_Search.py rename to code/tests/search_utilities/test_search.py index 4af9d88d2..05741fbaa 100644 --- a/code/tests/search_utilities/test_Search.py +++ b/code/tests/search_utilities/test_search.py @@ -1,10 +1,10 @@ import pytest from unittest.mock import Mock, MagicMock, patch -from backend.batch.utilities.search.Search import Search -from backend.batch.utilities.search.IntegratedVectorizationSearchHandler import ( +from backend.batch.utilities.search.search import Search +from backend.batch.utilities.search.integrated_vectorization_search_handler import ( IntegratedVectorizationSearchHandler, ) -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument @pytest.fixture @@ -21,7 +21,7 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def iv_search_handler_mock(): with patch( - "backend.batch.utilities.search.IntegratedVectorizationSearchHandler" + "backend.batch.utilities.search.integrated_vectorization_search_handler" ) as mock: yield mock @@ -92,7 +92,7 @@ def test_get_source_documents_integrated_vectorization_no_results(env_helper_moc assert len(source_documents) == len(search_results) -@patch("backend.batch.utilities.search.Search") +@patch("backend.batch.utilities.search.search") def test_get_source_documents_azure_search(search_handler_mock: MagicMock): # given question = "example question" diff --git a/code/tests/test_BatchPushResults.py b/code/tests/test_BatchPushResults.py deleted file mode 100644 index 7a686895a..000000000 --- a/code/tests/test_BatchPushResults.py +++ /dev/null @@ -1,128 +0,0 @@ -import json -import sys -import os -import pytest -from unittest.mock import patch -from azure.functions import QueueMessage - - -sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) - -from backend.batch.BatchPushResults import ( # noqa: E402 - batch_push_results, - _get_file_name_from_message, -) - - -@pytest.fixture(autouse=True) -def get_processor_handler_mock(): - with patch("backend.batch.BatchPushResults.EmbedderFactory.create") as mock_create_embedder, \ - patch("backend.batch.BatchPushResults.Search.get_search_handler") as mock_get_search_handler: - processor_handler_create = mock_create_embedder.return_value - processor_handler_get_search_handler = mock_get_search_handler.return_value - yield processor_handler_create, processor_handler_get_search_handler - - -def test_get_file_name_from_message(): - mock_queue_message = QueueMessage( - body='{"message": "test message", "filename": "test_filename.md"}' - ) - message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) - file_name = _get_file_name_from_message(message_body) - - assert file_name == "test_filename.md" - - -def test_get_file_name_from_message_no_filename(): - mock_queue_message = QueueMessage( - body='{"data": { "url": "test/test/test_filename.md"} }' - ) - message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) - file_name = _get_file_name_from_message(message_body) - - assert file_name == "test_filename.md" - - -def test_batch_push_results_with_unhandled_event_type(): - mock_queue_message = QueueMessage( - body='{"eventType": "Microsoft.Storage.BlobUpdated"}' - ) - - with pytest.raises(NotImplementedError): - batch_push_results.build().get_user_function()(mock_queue_message) - - -@patch("backend.batch.BatchPushResults._process_document_created_event") -def test_batch_push_results_with_blob_created_event( - mock_process_document_created_event, -): - mock_queue_message = QueueMessage( - body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' - ) - - batch_push_results.build().get_user_function()(mock_queue_message) - - expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) - mock_process_document_created_event.assert_called_once_with(expected_message_body) - - -@patch("backend.batch.BatchPushResults._process_document_created_event") -def test_batch_push_results_with_no_event(mock_process_document_created_event): - mock_queue_message = QueueMessage( - body='{"data": { "url": "test/test/test_filename.md"} }' - ) - - batch_push_results.build().get_user_function()(mock_queue_message) - - expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) - mock_process_document_created_event.assert_called_once_with(expected_message_body) - - -@patch("backend.batch.BatchPushResults._process_document_deleted_event") -def test_batch_push_results_with_blob_deleted_event( - mock_process_document_deleted_event, -): - mock_queue_message = QueueMessage( - body='{"eventType": "Microsoft.Storage.BlobDeleted", "filename": "test/test/test_filename.md"}' - ) - - batch_push_results.build().get_user_function()(mock_queue_message) - - expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) - mock_process_document_deleted_event.assert_called_once_with(expected_message_body) - - -@patch("backend.batch.BatchPushResults.EnvHelper") -@patch("backend.batch.BatchPushResults.AzureBlobStorageClient") -def test_batch_push_results_with_blob_created_event_uses_embedder( - mock_azure_blob_storage_client, - mock_env_helper, - get_processor_handler_mock, -): - mock_create_embedder, mock_get_search_handler = get_processor_handler_mock - - mock_queue_message = QueueMessage( - body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' - ) - - mock_blob_client_instance = mock_azure_blob_storage_client.return_value - mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" - - batch_push_results.build().get_user_function()(mock_queue_message) - mock_create_embedder.embed_file.assert_called_once_with( - "test_blob_sas", "test/test/test_filename.md" - ) - -@patch("backend.batch.BatchPushResults.EnvHelper") -def test_batch_push_results_with_blob_deleted_event_uses_search_to_delete_with_sas_appended( - mock_env_helper, - get_processor_handler_mock, -): - mock_create_embedder, mock_get_search_handler = get_processor_handler_mock - - mock_queue_message = QueueMessage( - body='{"eventType": "Microsoft.Storage.BlobDeleted", "data": { "url": "https://test.test/test/test_filename.pdf"}}' - ) - - batch_push_results.build().get_user_function()(mock_queue_message) - mock_get_search_handler.delete_by_source.assert_called_once_with("https://test.test/test/test_filename.pdf_SAS_TOKEN_PLACEHOLDER_") diff --git a/code/tests/test_AddURLEmbeddings.py b/code/tests/test_add_url_embeddings.py similarity index 87% rename from code/tests/test_AddURLEmbeddings.py rename to code/tests/test_add_url_embeddings.py index 614c28604..2e7abb744 100644 --- a/code/tests/test_AddURLEmbeddings.py +++ b/code/tests/test_add_url_embeddings.py @@ -6,10 +6,10 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.AddURLEmbeddings import add_url_embeddings # noqa: E402 +from backend.batch.add_url_embeddings import add_url_embeddings # noqa: E402 -@patch("backend.batch.AddURLEmbeddings.EmbedderFactory") +@patch("backend.batch.add_url_embeddings.EmbedderFactory") def test_add_url_embeddings(mock_embedder_factory: MagicMock): # given fake_request = func.HttpRequest( @@ -46,7 +46,7 @@ def test_add_url_embeddings_returns_400_when_url_not_set(): assert response.status_code == 400 -@patch("backend.batch.AddURLEmbeddings.EmbedderFactory") +@patch("backend.batch.add_url_embeddings.EmbedderFactory") def test_add_url_embeddings_returns_500_when_exception_occurs( mock_embedder_factory: MagicMock, ): @@ -71,9 +71,9 @@ def test_add_url_embeddings_returns_500_when_exception_occurs( ) -@patch("backend.batch.AddURLEmbeddings.EnvHelper") -@patch("backend.batch.AddURLEmbeddings.AzureBlobStorageClient") -@patch("backend.batch.AddURLEmbeddings.requests") +@patch("backend.batch.add_url_embeddings.EnvHelper") +@patch("backend.batch.add_url_embeddings.AzureBlobStorageClient") +@patch("backend.batch.add_url_embeddings.requests") def test_add_url_embeddings_integrated_vectorization( mock_requests: MagicMock, mock_blob_storage_client: MagicMock, @@ -105,9 +105,9 @@ def test_add_url_embeddings_integrated_vectorization( ) -@patch("backend.batch.AddURLEmbeddings.EnvHelper") -@patch("backend.batch.AddURLEmbeddings.AzureBlobStorageClient") -@patch("backend.batch.AddURLEmbeddings.requests") +@patch("backend.batch.add_url_embeddings.EnvHelper") +@patch("backend.batch.add_url_embeddings.AzureBlobStorageClient") +@patch("backend.batch.add_url_embeddings.requests") def test_add_url_embeddings_integrated_vectorization_returns_500_when_exception_occurs( mock_requests: MagicMock, mock_blob_storage_client: MagicMock, diff --git a/code/tests/test_AzureBlobStorage.py b/code/tests/test_azure_blob_storage.py similarity index 100% rename from code/tests/test_AzureBlobStorage.py rename to code/tests/test_azure_blob_storage.py diff --git a/code/tests/test_batch_push_results.py b/code/tests/test_batch_push_results.py new file mode 100644 index 000000000..b7c39c267 --- /dev/null +++ b/code/tests/test_batch_push_results.py @@ -0,0 +1,58 @@ +import sys +import os +import pytest +from unittest.mock import patch +from azure.functions import QueueMessage + + +sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) + +from backend.batch.batch_push_results import ( # noqa: E402 + batch_push_results, + _get_file_name_from_message, +) + + +@pytest.fixture(autouse=True) +def get_processor_handler_mock(): + with patch("backend.batch.batch_push_results.EmbedderFactory.create") as mock: + processor_handler = mock.return_value + yield processor_handler + + +def test_get_file_name_from_message(): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test_filename.md"}' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +def test_get_file_name_from_message_no_filename(): + mock_queue_message = QueueMessage( + body='{"data": { "url": "test/test/test_filename.md"} }' + ) + + file_name = _get_file_name_from_message(mock_queue_message) + + assert file_name == "test_filename.md" + + +@patch("backend.batch.batch_push_results.EnvHelper") +@patch("backend.batch.batch_push_results.AzureBlobStorageClient") +def test_batch_push_results( + mock_azure_blob_storage_client, mock_env_helper, get_processor_handler_mock +): + mock_queue_message = QueueMessage( + body='{"message": "test message", "filename": "test/test/test_filename.md"}' + ) + + mock_blob_client_instance = mock_azure_blob_storage_client.return_value + mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" + + batch_push_results.build().get_user_function()(mock_queue_message) + get_processor_handler_mock.embed_file.assert_called_once_with( + "test_blob_sas", "test/test/test_filename.md" + ) diff --git a/code/tests/test_BatchStartProcessing.py b/code/tests/test_batch_start_processing.py similarity index 84% rename from code/tests/test_BatchStartProcessing.py rename to code/tests/test_batch_start_processing.py index 0cfc757fe..9b3e5c3c6 100644 --- a/code/tests/test_BatchStartProcessing.py +++ b/code/tests/test_batch_start_processing.py @@ -5,12 +5,12 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.BatchStartProcessing import batch_start_processing # noqa: E402 +from backend.batch.batch_start_processing import batch_start_processing # noqa: E402 @pytest.fixture(autouse=True) def env_helper_mock(): - with patch("backend.batch.BatchStartProcessing.EnvHelper") as mock: + with patch("backend.batch.batch_start_processing.EnvHelper") as mock: env_helper = mock.return_value env_helper.AZURE_SEARCH_INDEXER_NAME = "AZURE_SEARCH_INDEXER_NAME" @@ -20,13 +20,13 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def mock_integrated_vectorization_embedder(): with patch( - "backend.batch.BatchStartProcessing.IntegratedVectorizationEmbedder" + "backend.batch.batch_start_processing.IntegratedVectorizationEmbedder" ) as mock: yield mock -@patch("backend.batch.BatchStartProcessing.create_queue_client") -@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +@patch("backend.batch.batch_start_processing.create_queue_client") +@patch("backend.batch.batch_start_processing.AzureBlobStorageClient") def test_batch_start_processing_processes_all( mock_blob_storage_client, mock_create_queue_client, env_helper_mock ): @@ -54,8 +54,8 @@ def test_batch_start_processing_processes_all( assert send_message_calls[1] == call(b'{"filename": "file_name_two"}') -@patch("backend.batch.BatchStartProcessing.create_queue_client") -@patch("backend.batch.BatchStartProcessing.AzureBlobStorageClient") +@patch("backend.batch.batch_start_processing.create_queue_client") +@patch("backend.batch.batch_start_processing.AzureBlobStorageClient") def test_batch_start_processing_processes_all_integrated_vectorization( mock_blob_storage_client, mock_create_queue_client, diff --git a/code/tests/test_ContentSafetyChecker.py b/code/tests/test_content_safety_checker.py similarity index 87% rename from code/tests/test_ContentSafetyChecker.py rename to code/tests/test_content_safety_checker.py index 7aa4de603..68e039459 100644 --- a/code/tests/test_ContentSafetyChecker.py +++ b/code/tests/test_content_safety_checker.py @@ -1,5 +1,5 @@ import pytest -from backend.batch.utilities.tools.ContentSafetyChecker import ContentSafetyChecker +from backend.batch.utilities.tools.content_safety_checker import ContentSafetyChecker @pytest.mark.azure("This test requires Azure Content Safety configured") diff --git a/code/tests/test_DocumentProcessor.py b/code/tests/test_document_processor.py similarity index 100% rename from code/tests/test_DocumentProcessor.py rename to code/tests/test_document_processor.py diff --git a/code/tests/test_GetConversationResponse.py b/code/tests/test_get_conversation_response.py similarity index 86% rename from code/tests/test_GetConversationResponse.py rename to code/tests/test_get_conversation_response.py index a8f21d37e..63c76e8be 100644 --- a/code/tests/test_GetConversationResponse.py +++ b/code/tests/test_get_conversation_response.py @@ -6,13 +6,13 @@ sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "backend", "batch")) -from backend.batch.GetConversationResponse import ( # noqa: E402 +from backend.batch.get_conversation_response import ( # noqa: E402 get_conversation_response, ) -@patch("backend.batch.GetConversationResponse.ConfigHelper") -@patch("backend.batch.GetConversationResponse.Orchestrator") +@patch("backend.batch.get_conversation_response.ConfigHelper") +@patch("backend.batch.get_conversation_response.Orchestrator") @pytest.mark.asyncio async def test_get_conversation_response(mock_create_message_orchestrator, _): mock_http_request = Mock() @@ -53,8 +53,8 @@ async def test_get_conversation_response(mock_create_message_orchestrator, _): ] -@patch("backend.batch.GetConversationResponse.ConfigHelper") -@patch("backend.batch.GetConversationResponse.Orchestrator") +@patch("backend.batch.get_conversation_response.ConfigHelper") +@patch("backend.batch.get_conversation_response.Orchestrator") @pytest.mark.asyncio async def test_get_conversation_error(_, __): mock_http_request = Mock() diff --git a/code/tests/test_OutputParserTool.py b/code/tests/test_output_parser_tool.py similarity index 98% rename from code/tests/test_OutputParserTool.py rename to code/tests/test_output_parser_tool.py index 323b8d44d..a01e73ef7 100644 --- a/code/tests/test_OutputParserTool.py +++ b/code/tests/test_output_parser_tool.py @@ -3,7 +3,7 @@ from typing import List from backend.batch.utilities.parser.output_parser_tool import OutputParserTool -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument def test_returns_parsed_messages(): diff --git a/code/tests/utilities/helpers/test_document_chunking_helper.py b/code/tests/utilities/helpers/test_document_chunking_helper.py index a44b4af80..fd6a1541c 100644 --- a/code/tests/utilities/helpers/test_document_chunking_helper.py +++ b/code/tests/utilities/helpers/test_document_chunking_helper.py @@ -1,4 +1,4 @@ -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument from backend.batch.utilities.helpers.document_chunking_helper import DocumentChunking from backend.batch.utilities.document_chunking.chunking_strategy import ( ChunkingStrategy, diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index 21de29880..015382902 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -5,7 +5,7 @@ from backend.batch.utilities.document_chunking.chunking_strategy import ChunkingSettings from backend.batch.utilities.document_loading import LoadingSettings from backend.batch.utilities.document_loading.strategies import LoadingStrategy -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument from backend.batch.utilities.helpers.config.embedding_config import EmbeddingConfig CHUNKING_SETTINGS = ChunkingSettings({"strategy": "layout", "size": 1, "overlap": 0}) diff --git a/code/tests/utilities/integrated_vectorization/test_azure_search_datasource.py b/code/tests/utilities/integrated_vectorization/test_azure_search_datasource.py index f2cdd168f..2fb2cece3 100644 --- a/code/tests/utilities/integrated_vectorization/test_azure_search_datasource.py +++ b/code/tests/utilities/integrated_vectorization/test_azure_search_datasource.py @@ -1,6 +1,6 @@ import pytest from unittest.mock import MagicMock, patch -from backend.batch.utilities.integrated_vectorization.AzureSearchDatasource import ( +from backend.batch.utilities.integrated_vectorization.azure_search_datasource import ( AzureSearchDatasource, ) from azure.search.documents.indexes._generated.models import ( @@ -23,7 +23,7 @@ @pytest.fixture(autouse=True) def env_helper_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchDatasource.EnvHelper" + "backend.batch.utilities.integrated_vectorization.azure_search_datasource.EnvHelper" ) as mock: env_helper = mock.return_value env_helper.AZURE_AUTH_TYPE = AZURE_AUTH_TYPE @@ -40,7 +40,7 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def search_indexer_client_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchDatasource.SearchIndexerClient" + "backend.batch.utilities.integrated_vectorization.azure_search_datasource.SearchIndexerClient" ) as mock: yield mock @@ -48,7 +48,7 @@ def search_indexer_client_mock(): @pytest.fixture(autouse=True) def search_indexer_data_container_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchDatasource.SearchIndexerDataContainer" + "backend.batch.utilities.integrated_vectorization.azure_search_datasource.SearchIndexerDataContainer" ) as mock: yield mock @@ -56,7 +56,7 @@ def search_indexer_data_container_mock(): @pytest.fixture(autouse=True) def search_indexer_datasource_connection_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchDatasource.SearchIndexerDataSourceConnection" + "backend.batch.utilities.integrated_vectorization.azure_search_datasource.SearchIndexerDataSourceConnection" ) as mock: yield mock diff --git a/code/tests/utilities/integrated_vectorization/test_azure_search_index.py b/code/tests/utilities/integrated_vectorization/test_azure_search_index.py index 314564880..fbcc8a833 100644 --- a/code/tests/utilities/integrated_vectorization/test_azure_search_index.py +++ b/code/tests/utilities/integrated_vectorization/test_azure_search_index.py @@ -1,6 +1,6 @@ import pytest from unittest.mock import ANY, MagicMock, patch -from backend.batch.utilities.integrated_vectorization.AzureSearchIndex import ( +from backend.batch.utilities.integrated_vectorization.azure_search_index import ( AzureSearchIndex, ) from azure.search.documents.indexes.models import ( @@ -18,7 +18,7 @@ @pytest.fixture(autouse=True) def env_helper_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndex.EnvHelper" + "backend.batch.utilities.integrated_vectorization.azure_search_index.EnvHelper" ) as mock: env_helper = mock.return_value env_helper.AZURE_AUTH_TYPE = AZURE_AUTH_TYPE @@ -32,7 +32,7 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def llm_helper_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndex.LLMHelper" + "backend.batch.utilities.integrated_vectorization.azure_search_index.LLMHelper" ) as mock: llm_helper = mock.return_value llm_helper.get_embedding_model.return_value.embed_query.return_value = [ @@ -45,7 +45,7 @@ def llm_helper_mock(): @pytest.fixture(autouse=True) def search_index_client_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndex.SearchIndexClient" + "backend.batch.utilities.integrated_vectorization.azure_search_index.SearchIndexClient" ) as mock: indexer_client = mock.return_value indexer_client.create_or_update_index.return_value = SearchIndex( diff --git a/code/tests/utilities/integrated_vectorization/test_azure_search_indexer.py b/code/tests/utilities/integrated_vectorization/test_azure_search_indexer.py index a656223ed..dae2e065f 100644 --- a/code/tests/utilities/integrated_vectorization/test_azure_search_indexer.py +++ b/code/tests/utilities/integrated_vectorization/test_azure_search_indexer.py @@ -1,6 +1,6 @@ import pytest from unittest.mock import ANY, MagicMock, patch -from backend.batch.utilities.integrated_vectorization.AzureSearchIndexer import ( +from backend.batch.utilities.integrated_vectorization.azure_search_indexer import ( AzureSearchIndexer, ) @@ -13,7 +13,7 @@ @pytest.fixture(autouse=True) def env_helper_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndexer.EnvHelper" + "backend.batch.utilities.integrated_vectorization.azure_search_indexer.EnvHelper" ) as mock: env_helper = mock.return_value env_helper.AZURE_AUTH_TYPE = AZURE_AUTH_TYPE @@ -27,7 +27,7 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def search_indexer_client_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndexer.SearchIndexerClient" + "backend.batch.utilities.integrated_vectorization.azure_search_indexer.SearchIndexerClient" ) as mock: yield mock @@ -35,7 +35,7 @@ def search_indexer_client_mock(): @pytest.fixture(autouse=True) def search_indexer_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchIndexer.SearchIndexer" + "backend.batch.utilities.integrated_vectorization.azure_search_indexer.SearchIndexer" ) as mock: yield mock diff --git a/code/tests/utilities/integrated_vectorization/test_azure_search_skillset.py b/code/tests/utilities/integrated_vectorization/test_azure_search_skillset.py index 24a4c19df..19150b948 100644 --- a/code/tests/utilities/integrated_vectorization/test_azure_search_skillset.py +++ b/code/tests/utilities/integrated_vectorization/test_azure_search_skillset.py @@ -1,6 +1,6 @@ import pytest from unittest.mock import MagicMock, patch -from backend.batch.utilities.integrated_vectorization.AzureSearchSkillset import ( +from backend.batch.utilities.integrated_vectorization.azure_search_skillset import ( AzureSearchSkillset, ) from azure.search.documents.indexes.models import ( @@ -21,7 +21,7 @@ @pytest.fixture(autouse=True) def env_helper_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchSkillset.EnvHelper" + "backend.batch.utilities.integrated_vectorization.azure_search_skillset.EnvHelper" ) as mock: env_helper = mock.return_value env_helper.AZURE_AUTH_TYPE = AZURE_AUTH_TYPE @@ -37,7 +37,7 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def search_indexer_client_mock(): with patch( - "backend.batch.utilities.integrated_vectorization.AzureSearchSkillset.SearchIndexerClient" + "backend.batch.utilities.integrated_vectorization.azure_search_skillset.SearchIndexerClient" ) as mock: indexer_client = mock.return_value indexer_client.create_or_update_skillset.return_value = SearchIndexerSkillset( diff --git a/code/tests/utilities/orchestrator/test_lang_chain_agent.py b/code/tests/utilities/orchestrator/test_lang_chain_agent.py index 8125d572d..0d3843479 100644 --- a/code/tests/utilities/orchestrator/test_lang_chain_agent.py +++ b/code/tests/utilities/orchestrator/test_lang_chain_agent.py @@ -2,7 +2,7 @@ import pytest from backend.batch.utilities.orchestrator.lang_chain_agent import LangChainAgent -from backend.batch.utilities.common.Answer import Answer +from backend.batch.utilities.common.answer import Answer class LangChainAgentNoInit(LangChainAgent): diff --git a/code/tests/utilities/orchestrator/test_semantic_kernel.py b/code/tests/utilities/orchestrator/test_semantic_kernel.py index 256084160..aeb9fd572 100644 --- a/code/tests/utilities/orchestrator/test_semantic_kernel.py +++ b/code/tests/utilities/orchestrator/test_semantic_kernel.py @@ -1,7 +1,7 @@ from unittest.mock import ANY, AsyncMock, MagicMock, call, patch import pytest -from backend.batch.utilities.common.Answer import Answer +from backend.batch.utilities.common.answer import Answer from backend.batch.utilities.orchestrator.semantic_kernel import ( SemanticKernelOrchestrator, ) diff --git a/code/tests/utilities/plugins/test_chat_plugin.py b/code/tests/utilities/plugins/test_chat_plugin.py index 19ebdccfb..333839efd 100644 --- a/code/tests/utilities/plugins/test_chat_plugin.py +++ b/code/tests/utilities/plugins/test_chat_plugin.py @@ -1,7 +1,7 @@ from unittest.mock import patch, MagicMock import pytest -from backend.batch.utilities.common.Answer import Answer +from backend.batch.utilities.common.answer import Answer from backend.batch.utilities.plugins.chat_plugin import ChatPlugin from semantic_kernel import Kernel diff --git a/code/tests/utilities/plugins/test_post_answering_plugin.py b/code/tests/utilities/plugins/test_post_answering_plugin.py index 55fc30fab..4f7ecaa28 100644 --- a/code/tests/utilities/plugins/test_post_answering_plugin.py +++ b/code/tests/utilities/plugins/test_post_answering_plugin.py @@ -1,7 +1,7 @@ from unittest.mock import patch, MagicMock import pytest -from backend.batch.utilities.common.Answer import Answer +from backend.batch.utilities.common.answer import Answer from backend.batch.utilities.plugins.post_answering_plugin import PostAnsweringPlugin from semantic_kernel import Kernel diff --git a/code/tests/utilities/test_question_answer_tool.py b/code/tests/utilities/test_question_answer_tool.py index 2fe8f5680..86e16d7f2 100644 --- a/code/tests/utilities/test_question_answer_tool.py +++ b/code/tests/utilities/test_question_answer_tool.py @@ -2,15 +2,15 @@ from unittest.mock import MagicMock, patch import pytest -from backend.batch.utilities.common.Answer import Answer -from backend.batch.utilities.tools.QuestionAnswerTool import QuestionAnswerTool +from backend.batch.utilities.common.answer import Answer +from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool from langchain_core.documents import Document -from backend.batch.utilities.common.SourceDocument import SourceDocument +from backend.batch.utilities.common.source_document import SourceDocument @pytest.fixture(autouse=True) def config_mock(): - with patch("backend.batch.utilities.tools.QuestionAnswerTool.ConfigHelper") as mock: + with patch("backend.batch.utilities.tools.question_answer_tool.ConfigHelper") as mock: config = mock.get_active_config_or_default.return_value config.prompts.answering_system_prompt = "mock answering system prompt" config.prompts.answering_user_prompt = ( @@ -31,7 +31,7 @@ def config_mock(): @pytest.fixture(autouse=True) def env_helper_mock(): - with patch("backend.batch.utilities.tools.QuestionAnswerTool.EnvHelper") as mock: + with patch("backend.batch.utilities.tools.question_answer_tool.EnvHelper") as mock: env_helper = mock.return_value env_helper.AZURE_OPENAI_SYSTEM_MESSAGE = "mock azure openai system message" env_helper.AZURE_SEARCH_TOP_K = 1 @@ -43,13 +43,13 @@ def env_helper_mock(): @pytest.fixture(autouse=True) def LLMHelperMock(): - with patch("backend.batch.utilities.tools.QuestionAnswerTool.LLMHelper") as mock: + with patch("backend.batch.utilities.tools.question_answer_tool.LLMHelper") as mock: yield mock @pytest.fixture(autouse=True) def LLMChainMock(): - with patch("backend.batch.utilities.tools.QuestionAnswerTool.LLMChain") as mock: + with patch("backend.batch.utilities.tools.question_answer_tool.LLMChain") as mock: mock.return_value.return_value = {"text": "mock content"} yield mock @@ -58,7 +58,7 @@ def LLMChainMock(): @pytest.fixture(autouse=True) def get_openai_callback_mock(): with patch( - "backend.batch.utilities.tools.QuestionAnswerTool.get_openai_callback" + "backend.batch.utilities.tools.question_answer_tool.get_openai_callback" ) as mock: yield mock @@ -66,7 +66,7 @@ def get_openai_callback_mock(): @pytest.fixture(autouse=True) def get_search_handler_mock(): with patch( - "backend.batch.utilities.tools.QuestionAnswerTool.Search.get_search_handler" + "backend.batch.utilities.tools.question_answer_tool.Search.get_search_handler" ) as mock: search_handler = mock.return_value @@ -76,7 +76,7 @@ def get_search_handler_mock(): @pytest.fixture(autouse=True) def get_source_documents_mock(): with patch( - "backend.batch.utilities.tools.QuestionAnswerTool.Search.get_source_documents" + "backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents" ) as mock: source_documents = mock.return_value yield source_documents @@ -85,7 +85,7 @@ def get_source_documents_mock(): @pytest.fixture(autouse=True) def get_source_documents_yield(): with patch( - "backend.batch.utilities.tools.QuestionAnswerTool.Search.get_source_documents" + "backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents" ) as mock: documents = [ SourceDocument( From 13157997f598bcec17d06cff6b47194b56e36223 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Wed, 15 May 2024 08:31:39 +0100 Subject: [PATCH 08/20] feat: Generate embeddings for images (#892) Co-authored-by: Adam Dougal Co-authored-by: Adam Dougal Co-authored-by: Arpit Gaur --- .../helpers/azure_computer_vision_client.py | 82 ++++++ .../utilities/helpers/config/config_helper.py | 28 +- .../helpers/embedders/embedder_factory.py | 2 +- .../helpers/embedders/push_embedder.py | 35 ++- .../batch/utilities/helpers/env_helper.py | 19 ++ code/tests/conftest.py | 35 +++ code/tests/constants.py | 5 + code/tests/functional/app_config.py | 1 + code/tests/functional/conftest.py | 50 +--- .../backend_api/default/test_azure_byod.py | 2 +- .../default/test_conversation_custom.py | 2 +- .../backend_api/default/test_speech_token.py | 2 +- ...est_response_with_search_documents_tool.py | 2 +- ...test_response_with_text_processing_tool.py | 2 +- .../test_response_without_tool_call.py | 2 +- .../test_azure_byod_without_data.py | 2 +- .../functional/tests/functions/conftest.py | 1 + .../test_advanced_image_processing.py | 42 ++- .../{functional => }/request_matching.py | 5 +- .../helpers/test_AzureComputerVisionClient.py | 244 ++++++++++++++++++ .../utilities/helpers/test_config_helper.py | 43 ++- .../utilities/helpers/test_env_helper.py | 1 + .../utilities/helpers/test_push_embedder.py | 67 ++++- infra/app/function.bicep | 3 + infra/app/storekeys.bicep | 11 + infra/main.bicep | 23 ++ infra/main.bicepparam | 4 + infra/main.json | 95 ++++++- 28 files changed, 730 insertions(+), 80 deletions(-) create mode 100644 code/backend/batch/utilities/helpers/azure_computer_vision_client.py create mode 100644 code/tests/constants.py rename code/tests/{functional => }/request_matching.py (94%) create mode 100644 code/tests/utilities/helpers/test_AzureComputerVisionClient.py diff --git a/code/backend/batch/utilities/helpers/azure_computer_vision_client.py b/code/backend/batch/utilities/helpers/azure_computer_vision_client.py new file mode 100644 index 000000000..c20b339eb --- /dev/null +++ b/code/backend/batch/utilities/helpers/azure_computer_vision_client.py @@ -0,0 +1,82 @@ +import logging +from typing import List +from urllib.parse import urljoin +from azure.identity import DefaultAzureCredential, get_bearer_token_provider + +import requests +from requests import Response + +from .env_helper import EnvHelper + +logger = logging.getLogger(__name__) + + +class AzureComputerVisionClient: + + __TOKEN_SCOPE = "https://cognitiveservices.azure.com/.default" + __VECTORIZE_IMAGE_PATH = "computervision/retrieval:vectorizeImage" + __RESPONSE_VECTOR_KEY = "vector" + + def __init__(self, env_helper: EnvHelper) -> None: + self.host = env_helper.AZURE_COMPUTER_VISION_ENDPOINT + self.timeout = env_helper.AZURE_COMPUTER_VISION_TIMEOUT + self.key = env_helper.AZURE_COMPUTER_VISION_KEY + self.use_keys = env_helper.is_auth_type_keys() + self.api_version = env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION + self.model_version = ( + env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION + ) + + def vectorize_image(self, image_url: str) -> List[float]: + logger.info(f"Making call to computer vision to vectorize image: {image_url}") + response = self.__make_request(image_url) + self.__validate_response(response) + + response_json = self.__get_json_body(response) + return self.__get_vectors(response_json) + + def __make_request(self, image_url: str) -> Response: + try: + headers = {} + if self.use_keys: + headers["Ocp-Apim-Subscription-Key"] = self.key + else: + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), self.__TOKEN_SCOPE + ) + headers["Authorization"] = "Bearer " + token_provider() + + return requests.post( + url=urljoin(self.host, self.__VECTORIZE_IMAGE_PATH), + params={ + "api-version": self.api_version, + "model-version": self.model_version, + }, + json={"url": image_url}, + headers=headers, + timeout=self.timeout, + ) + except Exception as e: + raise Exception(f"Call to vectorize image failed: {image_url}") from e + + def __validate_response(self, response: Response): + if response.status_code != 200: + raise Exception( + f"Call to vectorize image failed with status: {response.status_code} body: {response.text}" + ) + + def __get_json_body(self, response: Response) -> dict: + try: + return response.json() + except Exception as e: + raise Exception( + f"Call to vectorize image returned malformed response body: {response.text}", + ) from e + + def __get_vectors(self, response_json: dict) -> List[float]: + if self.__RESPONSE_VECTOR_KEY in response_json: + return response_json[self.__RESPONSE_VECTOR_KEY] + else: + raise Exception( + f"Call to vectorize image returned no vector: {response_json}" + ) diff --git a/code/backend/batch/utilities/helpers/config/config_helper.py b/code/backend/batch/utilities/helpers/config/config_helper.py index 5352f7b66..5bc1e0563 100644 --- a/code/backend/batch/utilities/helpers/config/config_helper.py +++ b/code/backend/batch/utilities/helpers/config/config_helper.py @@ -13,6 +13,7 @@ CONFIG_CONTAINER_NAME = "config" CONFIG_FILE_NAME = "active.json" +ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"] logger = logging.getLogger(__name__) @@ -54,8 +55,8 @@ def __init__(self, config: dict): else None ) - def get_available_document_types(self): - document_types = [ + def get_available_document_types(self) -> list[str]: + document_types = { "txt", "pdf", "url", @@ -65,12 +66,15 @@ def get_available_document_types(self): "jpg", "png", "docx", - ] + } if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING: - document_types.extend(["tiff", "bmp"]) + document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES) return sorted(document_types) + def get_advanced_image_processing_image_types(self): + return ADVANCED_IMAGE_PROCESSING_FILE_TYPES + def get_available_chunking_strategies(self): return [c.value for c in ChunkingStrategy] @@ -180,6 +184,7 @@ def get_active_config_or_default(): @staticmethod def save_config_as_active(config): + ConfigHelper.validate_config(config) blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME) blob_client = blob_client.upload_file( json.dumps(config, indent=2), @@ -187,6 +192,21 @@ def save_config_as_active(config): content_type="application/json", ) + @staticmethod + def validate_config(config: dict): + for document_processor in config.get("document_processors"): + document_type = document_processor.get("document_type") + unsupported_advanced_image_processing_file_type = ( + document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES + ) + if ( + document_processor.get("use_advanced_image_processing") + and unsupported_advanced_image_processing_file_type + ): + raise Exception( + f"Advanced image processing has been enabled for document type {document_type}, but only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported." + ) + @staticmethod def get_default_config(): if ConfigHelper._default_config is None: diff --git a/code/backend/batch/utilities/helpers/embedders/embedder_factory.py b/code/backend/batch/utilities/helpers/embedders/embedder_factory.py index 354c698f6..3a2336b99 100644 --- a/code/backend/batch/utilities/helpers/embedders/embedder_factory.py +++ b/code/backend/batch/utilities/helpers/embedders/embedder_factory.py @@ -12,4 +12,4 @@ def create(env_helper: EnvHelper): if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: return IntegratedVectorizationEmbedder(env_helper) else: - return PushEmbedder(AzureBlobStorageClient()) + return PushEmbedder(AzureBlobStorageClient(), env_helper) diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index 9f793e150..e6001d7ce 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -3,6 +3,8 @@ from typing import List from ...helpers.llm_helper import LLMHelper +from ...helpers.env_helper import EnvHelper +from ..azure_computer_vision_client import AzureComputerVisionClient from ..azure_blob_storage_client import AzureBlobStorageClient @@ -19,30 +21,48 @@ class PushEmbedder(EmbedderBase): - def __init__(self, blob_client: AzureBlobStorageClient): + def __init__(self, blob_client: AzureBlobStorageClient, env_helper: EnvHelper): self.llm_helper = LLMHelper() self.azure_search_helper = AzureSearchHelper() + self.azure_computer_vision_client = AzureComputerVisionClient(env_helper) self.document_loading = DocumentLoading() self.document_chunking = DocumentChunking() self.blob_client = blob_client - config = ConfigHelper.get_active_config_or_default() + self.config = ConfigHelper.get_active_config_or_default() self.embedding_configs = {} - for processor in config.document_processors: + for processor in self.config.document_processors: ext = processor.document_type.lower() self.embedding_configs[ext] = processor def embed_file(self, source_url: str, file_name: str): file_extension = file_name.split(".")[-1] embedding_config = self.embedding_configs.get(file_extension) - self.__embed(source_url=source_url, embedding_config=embedding_config) + self.__embed( + source_url=source_url, + file_extension=file_extension, + embedding_config=embedding_config, + ) if file_extension != "url": self.blob_client.upsert_blob_metadata( file_name, {"embeddings_added": "true"} ) - def __embed(self, source_url: str, embedding_config: EmbeddingConfig): + def __embed( + self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig + ): documents_to_upload: List[SourceDocument] = [] - if not embedding_config.use_advanced_image_processing: + if ( + embedding_config.use_advanced_image_processing + and file_extension + in self.config.get_advanced_image_processing_image_types() + ): + logger.warning("Advanced image processing is not supported yet") + image_vectors = self.azure_computer_vision_client.vectorize_image( + source_url + ) + logger.info("Image vectors: " + str(image_vectors)) + # Coming soon, storing the image embeddings in Azure Search + else: documents: List[SourceDocument] = self.document_loading.load( source_url, embedding_config.loading ) @@ -59,9 +79,6 @@ def __embed(self, source_url: str, embedding_config: EmbeddingConfig): if not all([r.succeeded for r in response]): raise Exception(response) - else: - logger.warning("Advanced image processing is not supported yet") - def _convert_to_search_document(self, document: SourceDocument): embedded_content = self.llm_helper.generate_embeddings(document.content) metadata = { diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index 6f4634869..138ecd890 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -111,6 +111,18 @@ def __load_config(self, **kwargs) -> None: self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool( "USE_ADVANCED_IMAGE_PROCESSING", "False" ) + self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv( + "AZURE_COMPUTER_VISION_ENDPOINT" + ) + self.AZURE_COMPUTER_VISION_TIMEOUT = self.get_env_var_float( + "AZURE_COMPUTER_VISION_TIMEOUT", 30 + ) + self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION = os.getenv( + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION", "2024-02-01" + ) + self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = os.getenv( + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION", "2023-04-15" + ) # Initialize Azure keys based on authentication type and environment settings. # When AZURE_AUTH_TYPE is "rbac", azure keys are None or an empty string. @@ -118,6 +130,7 @@ def __load_config(self, **kwargs) -> None: self.AZURE_SEARCH_KEY = None self.AZURE_OPENAI_API_KEY = "" self.AZURE_SPEECH_KEY = None + self.AZURE_COMPUTER_VISION_KEY = None else: self.AZURE_SEARCH_KEY = self.secretHelper.get_secret("AZURE_SEARCH_KEY") self.AZURE_OPENAI_API_KEY = self.secretHelper.get_secret( @@ -126,6 +139,9 @@ def __load_config(self, **kwargs) -> None: self.AZURE_SPEECH_KEY = self.secretHelper.get_secret( "AZURE_SPEECH_SERVICE_KEY" ) + self.AZURE_COMPUTER_VISION_KEY = self.secretHelper.get_secret( + "AZURE_COMPUTER_VISION_KEY" + ) # Set env for Azure OpenAI self.AZURE_OPENAI_ENDPOINT = os.environ.get( @@ -221,6 +237,9 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool: def get_env_var_array(self, var_name: str, default: str = ""): return os.getenv(var_name, default).split(",") + def get_env_var_float(self, var_name: str, default: int): + return float(os.getenv(var_name, default)) + def is_auth_type_keys(self): return self.AZURE_AUTH_TYPE == "keys" diff --git a/code/tests/conftest.py b/code/tests/conftest.py index e69de29bb..dff73fb36 100644 --- a/code/tests/conftest.py +++ b/code/tests/conftest.py @@ -0,0 +1,35 @@ +import ssl + +import pytest +import trustme + + +@pytest.fixture(scope="session") +def ca(): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + return trustme.CA() + + +@pytest.fixture(scope="session") +def httpserver_ssl_context(ca): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + localhost_cert = ca.issue_cert("localhost") + localhost_cert.configure_cert(context) + return context + + +@pytest.fixture(scope="session") +def httpclient_ssl_context(ca): + """ + This fixture is required to run the http mock server with SSL. + https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server + """ + with ca.cert_pem.tempfile() as ca_temp_path: + return ssl.create_default_context(cafile=ca_temp_path) diff --git a/code/tests/constants.py b/code/tests/constants.py new file mode 100644 index 000000000..d29977139 --- /dev/null +++ b/code/tests/constants.py @@ -0,0 +1,5 @@ +AZURE_STORAGE_CONFIG_CONTAINER_NAME = "config" +AZURE_STORAGE_CONFIG_FILE_NAME = "active.json" + +COMPUTER_VISION_VECTORIZE_IMAGE_PATH = "/computervision/retrieval:vectorizeImage" +COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD = "POST" diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py index aaef55d84..ae1569027 100644 --- a/code/tests/functional/app_config.py +++ b/code/tests/functional/app_config.py @@ -15,6 +15,7 @@ class AppConfig: ), "AZURE_BLOB_ACCOUNT_NAME": "some-blob-account-name", "AZURE_BLOB_CONTAINER_NAME": "some-blob-container-name", + "AZURE_COMPUTER_VISION_KEY": "some-computer-vision-key", "AZURE_CONTENT_SAFETY_ENDPOINT": "some-content-safety-endpoint", "AZURE_CONTENT_SAFETY_KEY": "some-content-safety-key", "AZURE_FORM_RECOGNIZER_ENDPOINT": "some-form-recognizer-endpoint", diff --git a/code/tests/functional/conftest.py b/code/tests/functional/conftest.py index 173d16dc4..970a7372d 100644 --- a/code/tests/functional/conftest.py +++ b/code/tests/functional/conftest.py @@ -1,54 +1,23 @@ -import ssl import pytest from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from backend.batch.utilities.helpers.config.config_helper import ( - CONFIG_CONTAINER_NAME, - CONFIG_FILE_NAME, +from tests.constants import ( + AZURE_STORAGE_CONFIG_CONTAINER_NAME, + AZURE_STORAGE_CONFIG_FILE_NAME, + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, ) -import trustme - - -@pytest.fixture(scope="session") -def ca(): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - return trustme.CA() - - -@pytest.fixture(scope="session") -def httpserver_ssl_context(ca): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) - localhost_cert = ca.issue_cert("localhost") - localhost_cert.configure_cert(context) - return context - - -@pytest.fixture(scope="session") -def httpclient_ssl_context(ca): - """ - This fixture is required to run the http mock server with SSL. - https://pytest-httpserver.readthedocs.io/en/latest/howto.html#running-an-https-server - """ - with ca.cert_pem.tempfile() as ca_temp_path: - return ssl.create_default_context(cafile=ca_temp_path) @pytest.fixture(scope="function", autouse=True) def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): httpserver.expect_request( - f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="HEAD", ).respond_with_data() httpserver.expect_request( - f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="GET", ).respond_with_json( { @@ -233,6 +202,11 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): method="POST", ).respond_with_data("speech-token") + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + yield httpserver.check() diff --git a/code/tests/functional/tests/backend_api/default/test_azure_byod.py b/code/tests/functional/tests/backend_api/default/test_azure_byod.py index 4e9738b21..8d1e22226 100644 --- a/code/tests/functional/tests/backend_api/default/test_azure_byod.py +++ b/code/tests/functional/tests/backend_api/default/test_azure_byod.py @@ -4,7 +4,7 @@ import requests from string import Template -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py index e9283d0ed..ad0bfd80f 100644 --- a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py +++ b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py @@ -4,7 +4,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/default/test_speech_token.py b/code/tests/functional/tests/backend_api/default/test_speech_token.py index 1e61e0c75..1388b0cd5 100644 --- a/code/tests/functional/tests/backend_api/default/test_speech_token.py +++ b/code/tests/functional/tests/backend_api/default/test_speech_token.py @@ -2,7 +2,7 @@ import requests from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py index 783c8006a..ec99d5203 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py @@ -2,7 +2,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py index 010b41758..d98cbcde4 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_text_processing_tool.py @@ -2,7 +2,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py index 5b19ab704..ce9002a06 100644 --- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py +++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py @@ -4,7 +4,7 @@ from pytest_httpserver import HTTPServer import requests -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py index d8787d69a..24dd49033 100644 --- a/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py +++ b/code/tests/functional/tests/backend_api/without_data/test_azure_byod_without_data.py @@ -4,7 +4,7 @@ import requests from string import Template -from tests.functional.request_matching import ( +from tests.request_matching import ( RequestMatcher, verify_request_made, ) diff --git a/code/tests/functional/tests/functions/conftest.py b/code/tests/functional/tests/functions/conftest.py index e03864398..d4102207f 100644 --- a/code/tests/functional/tests/functions/conftest.py +++ b/code/tests/functional/tests/functions/conftest.py @@ -18,6 +18,7 @@ def app_config(make_httpserver, ca): "AZURE_CONTENT_SAFETY_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_SPEECH_REGION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_STORAGE_ACCOUNT_ENDPOINT": f"https://localhost:{make_httpserver.port}/", + "AZURE_COMPUTER_VISION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "SSL_CERT_FILE": ca_temp_path, "CURL_CA_BUNDLE": ca_temp_path, } diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 42550f2a4..89867ce40 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -5,13 +5,15 @@ from azure.functions import QueueMessage import pytest -from backend.batch.utilities.helpers.config.config_helper import ( - CONFIG_CONTAINER_NAME, - CONFIG_FILE_NAME, -) from pytest_httpserver import HTTPServer from tests.functional.app_config import AppConfig -from tests.functional.request_matching import RequestMatcher, verify_request_made +from tests.request_matching import RequestMatcher, verify_request_made +from tests.constants import ( + AZURE_STORAGE_CONFIG_FILE_NAME, + AZURE_STORAGE_CONFIG_CONTAINER_NAME, + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, +) sys.path.append( os.path.join(os.path.dirname(sys.path[0]), "..", "..", "backend", "batch") @@ -78,7 +80,7 @@ def test_config_file_is_retrieved_from_storage( verify_request_made( mock_httpserver=httpserver, request_matcher=RequestMatcher( - path=f"/{CONFIG_CONTAINER_NAME}/{CONFIG_FILE_NAME}", + path=f"/{AZURE_STORAGE_CONFIG_CONTAINER_NAME}/{AZURE_STORAGE_CONFIG_FILE_NAME}", method="GET", headers={ "Authorization": ANY, @@ -88,6 +90,34 @@ def test_config_file_is_retrieved_from_storage( ) +def test_image_passed_to_computer_vision_to_generate_image_embeddings( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + request = verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": app_config.get( + "AZURE_COMPUTER_VISION_KEY" + ), + }, + times=1, + ), + )[0] + + assert request.get_json()["url"].startswith( + f"{app_config.get('AZURE_COMPUTER_VISION_ENDPOINT')}{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + ) + + def test_metadata_is_updated_after_processing( message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig ): diff --git a/code/tests/functional/request_matching.py b/code/tests/request_matching.py similarity index 94% rename from code/tests/functional/request_matching.py rename to code/tests/request_matching.py index e637e2092..2cb91fc98 100644 --- a/code/tests/functional/request_matching.py +++ b/code/tests/request_matching.py @@ -30,7 +30,9 @@ def __str__(self): return f"Path: {self.path}, Method: {self.method}, JSON: {self.json}, Headers: {self.headers}, Query String: {self.query_string}, Times: {self.times}" -def verify_request_made(mock_httpserver: HTTPServer, request_matcher: RequestMatcher): +def verify_request_made( + mock_httpserver: HTTPServer, request_matcher: RequestMatcher +) -> list[Request]: requests_log = mock_httpserver.log similar_requests = [] @@ -71,6 +73,7 @@ def verify_request_made(mock_httpserver: HTTPServer, request_matcher: RequestMat error_message += "\n--- Similar Request End" assert len(matching_requests) == request_matcher.times, error_message + return matching_requests def contains_all_headers(request_matcher: RequestMatcher, request: Request): diff --git a/code/tests/utilities/helpers/test_AzureComputerVisionClient.py b/code/tests/utilities/helpers/test_AzureComputerVisionClient.py new file mode 100644 index 000000000..40f9d530d --- /dev/null +++ b/code/tests/utilities/helpers/test_AzureComputerVisionClient.py @@ -0,0 +1,244 @@ +import json +from json import JSONDecodeError +from unittest import mock +from unittest.mock import MagicMock +import pytest +from pytest_httpserver import HTTPServer +from trustme import CA +import werkzeug +import time +from requests import ReadTimeout + +from backend.batch.utilities.helpers.azure_computer_vision_client import ( + AzureComputerVisionClient, +) +from tests.request_matching import RequestMatcher, verify_request_made +from tests.constants import ( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, +) + + +# These tests utilize `pytest_httpserver` to mock the Azure Computer Vision API. This is instead of mocking the requests +# library directly, like other client classes. The reasons for doing this are: +# 1. This gives us complete confidence that the requests library works as we expect it to, for example parsing of bad +# json. +# 2. It allows us to test the actual HTTP request that is being made to the Azure Computer Vision API. +# 3. If we need to change which http library we are using, there should be minimal changes required to the tests. +# +# If and when the Azure Computer Vision Python SDK starts to support the `vectorizeImage` and `vectorizeText` endpoints, +# and we switch to it, we should consider switching back to convential test mocking. + +IMAGE_URL = "some-image-url.jpg" +AZURE_COMPUTER_VISION_KEY = "some-api-key" + + +@pytest.fixture(autouse=True) +def pytest_ssl(monkeypatch: pytest.MonkeyPatch, ca: CA): + with ca.cert_pem.tempfile() as ca_temp_path: + monkeypatch.setenv("SSL_CERT_FILE", ca_temp_path) + monkeypatch.setenv("CURL_CA_BUNDLE", ca_temp_path) + yield + + +@pytest.fixture +def env_helper_mock(httpserver: HTTPServer): + env_helper_mock = MagicMock() + env_helper_mock.AZURE_COMPUTER_VISION_ENDPOINT = httpserver.url_for("") + env_helper_mock.AZURE_COMPUTER_VISION_KEY = AZURE_COMPUTER_VISION_KEY + env_helper_mock.AZURE_COMPUTER_VISION_TIMEOUT = 0.25 + env_helper_mock.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION = "2024-02-01" + env_helper_mock.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = "2023-04-15" + env_helper_mock.is_auth_type_keys.return_value = True + return env_helper_mock + + +@pytest.fixture +def azure_computer_vision_client(env_helper_mock: MagicMock): + return AzureComputerVisionClient(env_helper_mock) + + +@pytest.fixture +def azure_computer_vision_client_rbac(env_helper_mock: MagicMock): + env_helper_mock.is_auth_type_keys.return_value = False + return AzureComputerVisionClient(env_helper_mock) + + +def test_vectorize_image_calls_computer_vision_with_key_based_authentication( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + + # when + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Ocp-Apim-Subscription-Key": AZURE_COMPUTER_VISION_KEY, + }, + json={"url": IMAGE_URL}, + ), + ) + + +@mock.patch( + "backend.batch.utilities.helpers.azure_computer_vision_client.DefaultAzureCredential" +) +@mock.patch( + "backend.batch.utilities.helpers.azure_computer_vision_client.get_bearer_token_provider" +) +def test_vectorize_image_calls_computer_vision_with_rbac_based_authentication( + mock_get_bearer_token_provider: MagicMock, + mock_default_azure_credential: MagicMock, + httpserver: HTTPServer, + azure_computer_vision_client_rbac: AzureComputerVisionClient, +): + # given + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + + # when + mock_get_bearer_token_provider.return_value.return_value = "dummy token" + + azure_computer_vision_client_rbac.vectorize_image(IMAGE_URL) + + # then + mock_default_azure_credential.assert_called_once() + mock_get_bearer_token_provider.assert_called_once_with( + mock_default_azure_credential.return_value, + "https://cognitiveservices.azure.com/.default", + ) + + verify_request_made( + httpserver, + RequestMatcher( + path=COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + method=COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + query_string="api-version=2024-02-01&model-version=2023-04-15", + headers={ + "Content-Type": "application/json", + "Authorization": "Bearer dummy token", + }, + json={"url": IMAGE_URL}, + ), + ) + + +def test_returns_image_vectors( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + expected_vectors = [1.0, 2.0, 3.0] + + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json({"modelVersion": "2022-04-11", "vector": expected_vectors}) + + # when + actual_vectors = azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert actual_vectors == expected_vectors + + +def test_vectorize_image_calls_computer_vision_timeout( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + def handler(_) -> werkzeug.Response: + time.sleep(0.3) + return werkzeug.Response( + json.dumps({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}), + status=200, + ) + + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_handler(handler) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + assert exec_info.value.args[0] == "Call to vectorize image failed: " + IMAGE_URL + assert isinstance(exec_info.value.__cause__, ReadTimeout) + + +def test_raises_exception_if_bad_response_code( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = {"error": "computer says no"} + response_status = 500 + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json(response_body, status=response_status) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image failed with status: {response_status} body: {json.dumps(response_body, indent=4)}" + ) + + +def test_raises_exception_if_non_json_response( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = "not json" + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_data(response_body, status=200) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image returned malformed response body: {response_body}" + ) + assert isinstance(exec_info.value.__cause__, JSONDecodeError) + + +def test_raises_exception_if_vector_not_in_response( + httpserver: HTTPServer, azure_computer_vision_client: AzureComputerVisionClient +): + # given + response_body = {"modelVersion": "2022-04-11"} + httpserver.expect_request( + COMPUTER_VISION_VECTORIZE_IMAGE_PATH, + COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, + ).respond_with_json(response_body, status=200) + + # when + with pytest.raises(Exception) as exec_info: + azure_computer_vision_client.vectorize_image(IMAGE_URL) + + # then + assert ( + exec_info.value.args[0] + == f"Call to vectorize image returned no vector: {response_body}" + ) diff --git a/code/tests/utilities/helpers/test_config_helper.py b/code/tests/utilities/helpers/test_config_helper.py index 868697af1..7be1a05d6 100644 --- a/code/tests/utilities/helpers/test_config_helper.py +++ b/code/tests/utilities/helpers/test_config_helper.py @@ -125,7 +125,9 @@ def blob_client_mock(config_dict: dict, AzureBlobStorageClientMock: MagicMock): @pytest.fixture(autouse=True) def env_helper_mock(): - with patch("backend.batch.utilities.helpers.config.config_helper.EnvHelper") as mock: + with patch( + "backend.batch.utilities.helpers.config.config_helper.EnvHelper" + ) as mock: env_helper = mock.return_value env_helper.ORCHESTRATION_STRATEGY = "openai_function" env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE = True @@ -263,6 +265,37 @@ def test_save_config_as_active( ) +def test_save_config_as_active_validates_advanced_image_file_types_are_valid( + AzureBlobStorageClientMock: MagicMock, + config_dict: dict, +): + # given + config_dict["document_processors"] = [ + { + "document_type": "txt", + "chunking": { + "strategy": "layout", + "size": 500, + "overlap": 100, + }, + "loading": { + "strategy": "web", + }, + "use_advanced_image_processing": True, + } + ] + + # when + with pytest.raises(Exception) as e: + ConfigHelper.save_config_as_active(config_dict) + + # then + assert str(e.value) == ( + "Advanced image processing has been enabled for document type txt, but only ['jpeg', 'jpg', 'png', 'tiff', 'bmp'] file types are supported." + ) + AzureBlobStorageClientMock.assert_not_called() + + def test_delete_config(AzureBlobStorageClientMock: MagicMock): # when ConfigHelper.delete_config() @@ -362,6 +395,14 @@ def test_get_available_document_types_when_advanced_image_processing_enabled( ) +def test_get_advanced_image_processing_image_types(config: Config): + # when + image_types = config.get_advanced_image_processing_image_types() + + # then + assert sorted(image_types) == sorted(["jpeg", "jpg", "png", "tiff", "bmp"]) + + def test_get_available_chunking_strategies(config: Config): # when chunking_strategies = config.get_available_chunking_strategies() diff --git a/code/tests/utilities/helpers/test_env_helper.py b/code/tests/utilities/helpers/test_env_helper.py index 373d46108..aff7090bc 100644 --- a/code/tests/utilities/helpers/test_env_helper.py +++ b/code/tests/utilities/helpers/test_env_helper.py @@ -83,6 +83,7 @@ def test_keys_are_unset_when_auth_type_rbac(monkeypatch: MonkeyPatch): assert env_helper.AZURE_SEARCH_KEY is None assert env_helper.AZURE_OPENAI_API_KEY == "" assert env_helper.AZURE_SPEECH_KEY is None + assert env_helper.AZURE_COMPUTER_VISION_KEY is None def test_sets_default_log_level_when_unset(): diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index 015382902..df2fc034c 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -53,6 +53,11 @@ def mock_config_helper(): use_advanced_image_processing=False, ), ] + config_helper.get_advanced_image_processing_image_types.return_value = { + "jpeg", + "jpg", + "png", + } yield config_helper @@ -99,11 +104,19 @@ def document_chunking_mock(): yield mock -def test_embed_file_use_advanced_image_processing_skips_processing( +@pytest.fixture(autouse=True) +def azure_computer_vision_mock(): + with patch( + "backend.batch.utilities.helpers.embedders.push_embedder.AzureComputerVisionClient" + ) as mock: + yield mock + + +def test_embed_file_advanced_image_processing_skips_document_processing( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file("some-url", "some-file-name.jpg") @@ -112,9 +125,49 @@ def test_embed_file_use_advanced_image_processing_skips_processing( azure_search_helper_mock.return_value.get_search_client.assert_not_called() +def test_embed_file_advanced_image_processing_vectorizes_image( + azure_computer_vision_mock, +): + # given + push_embedder = PushEmbedder(MagicMock(), MagicMock()) + source_url = "http://localhost:8080/some-file-name.jpg" + + # when + push_embedder.embed_file(source_url, "some-file-name.jpg") + + # then + azure_computer_vision_mock.return_value.vectorize_image.assert_called_once_with( + source_url + ) + + +def test_embed_file_use_advanced_image_processing_does_not_vectorize_image_if_unsupported( + azure_computer_vision_mock, mock_config_helper, azure_search_helper_mock +): + # given + mock_config_helper.document_processors = [ + EmbeddingConfig( + "txt", + CHUNKING_SETTINGS, + LOADING_SETTINGS, + use_advanced_image_processing=True, + ), + ] + + push_embedder = PushEmbedder(MagicMock(), MagicMock()) + source_url = "http://localhost:8080/some-file-name.txt" + + # when + push_embedder.embed_file(source_url, "some-file-name.txt") + + # then + azure_computer_vision_mock.return_value.vectorize_image.assert_not_called() + azure_search_helper_mock.return_value.get_search_client.assert_called_once() + + def test_embed_file_loads_documents(document_loading_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) source_url = "some-url" # when @@ -131,7 +184,7 @@ def test_embed_file_loads_documents(document_loading_mock): def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -147,7 +200,7 @@ def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mo def test_embed_file_generates_embeddings_for_documents(llm_helper_mock): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -167,7 +220,7 @@ def test_embed_file_stores_documents_in_search_index( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) # when push_embedder.embed_file( @@ -227,7 +280,7 @@ def test_embed_file_raises_exception_on_failure( azure_search_helper_mock, ): # given - push_embedder = PushEmbedder(MagicMock()) + push_embedder = PushEmbedder(MagicMock(), MagicMock()) successful_indexing_result = MagicMock() successful_indexing_result.succeeded = True diff --git a/infra/app/function.bicep b/infra/app/function.bicep index 1460fb8b9..399e54a79 100644 --- a/infra/app/function.bicep +++ b/infra/app/function.bicep @@ -16,11 +16,13 @@ param azureAISearchName string = '' param formRecognizerName string = '' param contentSafetyName string = '' param speechServiceName string = '' +param computerVisionName string = '' param useKeyVault bool param openAIKeyName string = '' param storageAccountKeyName string = '' param formRecognizerKeyName string = '' param searchKeyName string = '' +param computerVisionKeyName string = '' param contentSafetyKeyName string = '' param speechKeyName string = '' param authType string @@ -49,6 +51,7 @@ module function '../core/host/functions.bicep' = { AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 + AZURE_COMPUTER_VISION_KEY: useKeyVault ? computerVisionKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 }) } } diff --git a/infra/app/storekeys.bicep b/infra/app/storekeys.bicep index 5075b9d4b..d4498025b 100644 --- a/infra/app/storekeys.bicep +++ b/infra/app/storekeys.bicep @@ -6,12 +6,14 @@ param rgName string = '' param formRecognizerName string = '' param contentSafetyName string = '' param speechServiceName string = '' +param computerVisionName string = '' param storageAccountKeyName string = 'AZURE-STORAGE-ACCOUNT-KEY' param openAIKeyName string = 'AZURE-OPENAI-API-KEY' param searchKeyName string = 'AZURE-SEARCH-KEY' param formRecognizerKeyName string = 'AZURE-FORM-RECOGNIZER-KEY' param contentSafetyKeyName string = 'AZURE-CONTENT-SAFETY-KEY' param speechKeyName string = 'AZURE-SPEECH-KEY' +param computerVisionKeyName string = 'AZURE-COMPUTER-VISION-KEY' resource storageAccountKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { parent: keyVault @@ -61,6 +63,14 @@ resource speechKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { } } +resource computerVisionKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { + parent: keyVault + name: computerVisionKeyName + properties: { + value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 + } +} + resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = { name: keyVaultName } @@ -71,3 +81,4 @@ output SEARCH_KEY_NAME string = searchKeySecret.name output OPENAI_KEY_NAME string = openAIKeySecret.name output STORAGE_ACCOUNT_KEY_NAME string = storageAccountKeySecret.name output SPEECH_KEY_NAME string = speechKeySecret.name +output COMPUTER_VISION_KEY_NAME string = computerVisionKeySecret.name diff --git a/infra/main.bicep b/infra/main.bicep index f19c80f28..fca9bb6b7 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -180,6 +180,12 @@ param computerVisionSkuName string = 'S1' ]) param computerVisionLocation string = useAdvancedImageProcessing ? location : '' +@description('Azure Computer Vision Vectorize Image API Version') +param computerVisionVectorizeImageApiVersion string = '2024-02-01' + +@description('Azure Computer Vision Vectorize Image Model Version') +param computerVisionVectorizeImageModelVersion string ='2023-04-15' + @description('Azure AI Search Resource') param azureAISearchName string = 'search-${resourceToken}' @@ -424,6 +430,7 @@ module storekeys './app/storekeys.bicep' = if (useKeyVault) { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechServiceName + computerVisionName: computerVision.outputs.name rgName: rgName } } @@ -651,6 +658,7 @@ module adminweb './app/adminweb.bicep' = if (hostingModel == 'code') { AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing BACKEND_URL: 'https://${functionName}.azurewebsites.net' DOCUMENT_PROCESSING_QUEUE_NAME: queueName FUNCTION_KEY: clientKey @@ -716,6 +724,7 @@ module adminweb_docker './app/adminweb.bicep' = if (hostingModel == 'container') AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing BACKEND_URL: 'https://${functionName}-docker.azurewebsites.net' DOCUMENT_PROCESSING_QUEUE_NAME: queueName FUNCTION_KEY: clientKey @@ -774,6 +783,7 @@ module function './app/function.bicep' = if (hostingModel == 'code') { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechService.outputs.name + computerVisionName: computerVision.outputs.name clientKey: clientKey openAIKeyName: useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' storageAccountKeyName: useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' @@ -781,12 +791,15 @@ module function './app/function.bicep' = if (hostingModel == 'code') { searchKeyName: useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' contentSafetyKeyName: useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' speechKeyName: useKeyVault ? storekeys.outputs.SPEECH_KEY_NAME : '' + computerVisionKeyName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' useKeyVault: useKeyVault keyVaultName: useKeyVault || authType == 'rbac' ? keyvault.outputs.name : '' authType: authType appSettings: { AZURE_BLOB_ACCOUNT_NAME: storageAccountName AZURE_BLOB_CONTAINER_NAME: blobContainerName + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION: computerVisionVectorizeImageApiVersion + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION: computerVisionVectorizeImageModelVersion AZURE_CONTENT_SAFETY_ENDPOINT: contentsafety.outputs.endpoint AZURE_FORM_RECOGNIZER_ENDPOINT: formrecognizer.outputs.endpoint AZURE_OPENAI_MODEL: azureOpenAIModel @@ -798,6 +811,8 @@ module function './app/function.bicep' = if (hostingModel == 'code') { AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -828,12 +843,15 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') searchKeyName: useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' contentSafetyKeyName: useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' speechKeyName: useKeyVault ? storekeys.outputs.SPEECH_KEY_NAME : '' + computerVisionName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' useKeyVault: useKeyVault keyVaultName: useKeyVault || authType == 'rbac' ? keyvault.outputs.name : '' authType: authType appSettings: { AZURE_BLOB_ACCOUNT_NAME: storageAccountName AZURE_BLOB_CONTAINER_NAME: blobContainerName + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION: computerVisionVectorizeImageApiVersion + AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION: computerVisionVectorizeImageModelVersion AZURE_CONTENT_SAFETY_ENDPOINT: contentsafety.outputs.endpoint AZURE_FORM_RECOGNIZER_ENDPOINT: formrecognizer.outputs.endpoint AZURE_OPENAI_MODEL: azureOpenAIModel @@ -845,6 +863,8 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') AZURE_SEARCH_DATASOURCE_NAME: azureSearchDatasource AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization + USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing + AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -970,6 +990,8 @@ output AZURE_APP_SERVICE_HOSTING_MODEL string = hostingModel output AZURE_BLOB_CONTAINER_NAME string = blobContainerName output AZURE_BLOB_ACCOUNT_NAME string = storageAccountName output AZURE_BLOB_ACCOUNT_KEY string = useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' +output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION string = computerVisionVectorizeImageApiVersion +output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION string = computerVisionVectorizeImageModelVersion output AZURE_CONTENT_SAFETY_ENDPOINT string = contentsafety.outputs.endpoint output AZURE_CONTENT_SAFETY_KEY string = useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' output AZURE_FORM_RECOGNIZER_ENDPOINT string = formrecognizer.outputs.endpoint @@ -989,6 +1011,7 @@ output AZURE_OPENAI_RESOURCE string = azureOpenAIResourceName output AZURE_OPENAI_EMBEDDING_MODEL string = azureOpenAIEmbeddingModel output AZURE_OPENAI_MODEL string = azureOpenAIModel output AZURE_OPENAI_API_KEY string = useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' +output AZURE_COMPUTER_VISION_ENDPOINT string = computerVision.outputs.endpoint output AZURE_RESOURCE_GROUP string = rgName output AZURE_SEARCH_KEY string = useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' output AZURE_SEARCH_SERVICE string = search.outputs.endpoint diff --git a/infra/main.bicepparam b/infra/main.bicepparam index 948df0d5b..2aaec96f4 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -35,7 +35,11 @@ param azureOpenAIMaxTokens = readEnvironmentVariable('AZURE_OPENAI_MAX_TOKENS', param azureOpenAITemperature = readEnvironmentVariable('AZURE_OPENAI_TEMPERATURE', '0') param azureOpenAITopP = readEnvironmentVariable('AZURE_OPENAI_TOP_P', '1') param azureOpenAIStopSequence = readEnvironmentVariable('AZURE_OPENAI_STOP_SEQUENCE', '\n') + +// Computer Vision parameters param computerVisionLocation = readEnvironmentVariable('AZURE_COMPUTER_VISION_LOCATION', '') +param computerVisionVectorizeImageApiVersion = readEnvironmentVariable('AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION', '2024-02-01') +param computerVisionVectorizeImageModelVersion = readEnvironmentVariable('AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION', '2023-04-15') // The following are being renamed to align with the new naming convention // we manipulate existing resources here to maintain backwards compatibility diff --git a/infra/main.json b/infra/main.json index cdb55d8da..915088a7c 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "17426906878691848997" + "templateHash": "95501043309266990" } }, "parameters": { @@ -366,6 +366,20 @@ "description": "Location of Computer Vision Resource (if useAdvancedImageProcessing=true)" } }, + "computerVisionVectorizeImageApiVersion": { + "type": "string", + "defaultValue": "2024-02-01", + "metadata": { + "description": "Azure Computer Vision Vectorize Image API Version" + } + }, + "computerVisionVectorizeImageModelVersion": { + "type": "string", + "defaultValue": "2023-04-15", + "metadata": { + "description": "Azure Computer Vision Vectorize Image Model Version" + } + }, "azureAISearchName": { "type": "string", "defaultValue": "[format('search-{0}', parameters('resourceToken'))]", @@ -1438,6 +1452,9 @@ "speechServiceName": { "value": "[parameters('speechServiceName')]" }, + "computerVisionName": { + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" + }, "rgName": { "value": "[variables('rgName')]" } @@ -1449,7 +1466,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "6700778522040462677" + "templateHash": "3769272141523051550" } }, "parameters": { @@ -1485,6 +1502,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "storageAccountKeyName": { "type": "string", "defaultValue": "AZURE-STORAGE-ACCOUNT-KEY" @@ -1508,6 +1529,10 @@ "speechKeyName": { "type": "string", "defaultValue": "AZURE-SPEECH-KEY" + }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "AZURE-COMPUTER-VISION-KEY" } }, "resources": [ @@ -1558,6 +1583,14 @@ "properties": { "value": "[listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1]" } + }, + { + "type": "Microsoft.KeyVault/vaults/secrets", + "apiVersion": "2022-07-01", + "name": "[format('{0}/{1}', parameters('keyVaultName'), parameters('computerVisionKeyName'))]", + "properties": { + "value": "[listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1]" + } } ], "outputs": { @@ -1584,11 +1617,16 @@ "SPEECH_KEY_NAME": { "type": "string", "value": "[parameters('speechKeyName')]" + }, + "COMPUTER_VISION_KEY_NAME": { + "type": "string", + "value": "[parameters('computerVisionKeyName')]" } } } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('azureOpenAIResourceName'))]", @@ -3837,6 +3875,7 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", "BACKEND_URL": "[format('https://{0}.azurewebsites.net', parameters('functionName'))]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "FUNCTION_KEY": "[variables('clientKey')]", @@ -4764,6 +4803,7 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", "BACKEND_URL": "[format('https://{0}-docker.azurewebsites.net', parameters('functionName'))]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "FUNCTION_KEY": "[variables('clientKey')]", @@ -7382,6 +7422,9 @@ "speechServiceName": { "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('speechServiceName')), '2022-09-01').outputs.name.value]" }, + "computerVisionName": { + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" + }, "clientKey": { "value": "[variables('clientKey')]" }, @@ -7391,6 +7434,7 @@ "searchKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SEARCH_KEY_NAME.value), createObject('value', ''))]", "contentSafetyKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.CONTENT_SAFETY_KEY_NAME.value), createObject('value', ''))]", "speechKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SPEECH_KEY_NAME.value), createObject('value', ''))]", + "computerVisionKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", "useKeyVault": { "value": "[parameters('useKeyVault')]" }, @@ -7402,6 +7446,8 @@ "value": { "AZURE_BLOB_ACCOUNT_NAME": "[parameters('storageAccountName')]", "AZURE_BLOB_CONTAINER_NAME": "[variables('blobContainerName')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": "[parameters('computerVisionVectorizeImageApiVersion')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": "[parameters('computerVisionVectorizeImageModelVersion')]", "AZURE_CONTENT_SAFETY_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]", "AZURE_FORM_RECOGNIZER_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName')), '2022-09-01').outputs.endpoint.value]", "AZURE_OPENAI_MODEL": "[parameters('azureOpenAIModel')]", @@ -7413,6 +7459,8 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -7426,7 +7474,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "7212582717200024100" + "templateHash": "3188820983633786286" } }, "parameters": { @@ -7491,6 +7539,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "useKeyVault": { "type": "bool" }, @@ -7510,6 +7562,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "" + }, "contentSafetyKeyName": { "type": "string", "defaultValue": "" @@ -7597,7 +7653,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -8541,6 +8597,7 @@ } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('hostingPlanName'))]", @@ -8611,6 +8668,7 @@ "searchKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SEARCH_KEY_NAME.value), createObject('value', ''))]", "contentSafetyKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.CONTENT_SAFETY_KEY_NAME.value), createObject('value', ''))]", "speechKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SPEECH_KEY_NAME.value), createObject('value', ''))]", + "computerVisionName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", "useKeyVault": { "value": "[parameters('useKeyVault')]" }, @@ -8622,6 +8680,8 @@ "value": { "AZURE_BLOB_ACCOUNT_NAME": "[parameters('storageAccountName')]", "AZURE_BLOB_CONTAINER_NAME": "[variables('blobContainerName')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": "[parameters('computerVisionVectorizeImageApiVersion')]", + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": "[parameters('computerVisionVectorizeImageModelVersion')]", "AZURE_CONTENT_SAFETY_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]", "AZURE_FORM_RECOGNIZER_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName')), '2022-09-01').outputs.endpoint.value]", "AZURE_OPENAI_MODEL": "[parameters('azureOpenAIModel')]", @@ -8633,6 +8693,8 @@ "AZURE_SEARCH_DATASOURCE_NAME": "[parameters('azureSearchDatasource')]", "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", + "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -8646,7 +8708,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "7212582717200024100" + "templateHash": "3188820983633786286" } }, "parameters": { @@ -8711,6 +8773,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionName": { + "type": "string", + "defaultValue": "" + }, "useKeyVault": { "type": "bool" }, @@ -8730,6 +8796,10 @@ "type": "string", "defaultValue": "" }, + "computerVisionKeyName": { + "type": "string", + "defaultValue": "" + }, "contentSafetyKeyName": { "type": "string", "defaultValue": "" @@ -8817,7 +8887,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -9761,6 +9831,7 @@ } }, "dependsOn": [ + "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision')]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('formRecognizerName'))]", "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('hostingPlanName'))]", @@ -10738,6 +10809,14 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.STORAGE_ACCOUNT_KEY_NAME.value, '')]" }, + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": { + "type": "string", + "value": "[parameters('computerVisionVectorizeImageApiVersion')]" + }, + "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION": { + "type": "string", + "value": "[parameters('computerVisionVectorizeImageModelVersion')]" + }, "AZURE_CONTENT_SAFETY_ENDPOINT": { "type": "string", "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('contentSafetyName')), '2022-09-01').outputs.endpoint.value]" @@ -10814,6 +10893,10 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.OPENAI_KEY_NAME.value, '')]" }, + "AZURE_COMPUTER_VISION_ENDPOINT": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]" + }, "AZURE_RESOURCE_GROUP": { "type": "string", "value": "[variables('rgName')]" From 9c8d5aa191eb4d278e75b2636d68b76d164746fd Mon Sep 17 00:00:00 2001 From: Adam Dougal Date: Wed, 15 May 2024 13:48:34 +0100 Subject: [PATCH 09/20] ci: Add stale bot (#918) --- .github/workflows/stale-bot.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/stale-bot.yml diff --git a/.github/workflows/stale-bot.yml b/.github/workflows/stale-bot.yml new file mode 100644 index 000000000..e31059ab4 --- /dev/null +++ b/.github/workflows/stale-bot.yml @@ -0,0 +1,19 @@ +name: 'Close stale issues and PRs' +on: + schedule: + - cron: '30 1 * * *' + +permissions: + contents: write + issues: write + pull-requests: write + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v9 + with: + stale-issue-message: 'This issue is stale because it has been open 180 days with no activity. Remove stale label or comment or this will be closed in 30 days.' + days-before-stale: 180 + days-before-close: 30 From 8762f243957d0569d9063dd041e9e41e48e7d606 Mon Sep 17 00:00:00 2001 From: Arpit Gaur Date: Wed, 15 May 2024 15:41:11 +0100 Subject: [PATCH 10/20] fix: Keep the Admin.py as uppercase naming to allow the streamlit pick it as is. (#912) --- code/backend/{admin.py => Admin.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename code/backend/{admin.py => Admin.py} (100%) diff --git a/code/backend/admin.py b/code/backend/Admin.py similarity index 100% rename from code/backend/admin.py rename to code/backend/Admin.py From 31bc18ba14915032d4bf64bcb37b8ad86ae0b1c4 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Wed, 15 May 2024 15:43:47 +0100 Subject: [PATCH 11/20] fix: Fix computer vision for deployments (#919) --- infra/app/function.bicep | 2 +- infra/app/storekeys.bicep | 6 ++--- infra/main.bicep | 14 +++++++----- infra/main.json | 47 +++++++++++++++++++++------------------ 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/infra/app/function.bicep b/infra/app/function.bicep index 399e54a79..3c69a3583 100644 --- a/infra/app/function.bicep +++ b/infra/app/function.bicep @@ -51,7 +51,7 @@ module function '../core/host/functions.bicep' = { AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 - AZURE_COMPUTER_VISION_KEY: useKeyVault ? computerVisionKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 + AZURE_COMPUTER_VISION_KEY: (useKeyVault || computerVisionName == '') ? computerVisionKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 }) } } diff --git a/infra/app/storekeys.bicep b/infra/app/storekeys.bicep index d4498025b..57084d9c1 100644 --- a/infra/app/storekeys.bicep +++ b/infra/app/storekeys.bicep @@ -63,11 +63,11 @@ resource speechKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { } } -resource computerVisionKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { +resource computerVisionKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (computerVisionName != '') { parent: keyVault name: computerVisionKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 + value: computerVisionName != '' ? listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 : '' } } @@ -81,4 +81,4 @@ output SEARCH_KEY_NAME string = searchKeySecret.name output OPENAI_KEY_NAME string = openAIKeySecret.name output STORAGE_ACCOUNT_KEY_NAME string = storageAccountKeySecret.name output SPEECH_KEY_NAME string = speechKeySecret.name -output COMPUTER_VISION_KEY_NAME string = computerVisionKeySecret.name +output COMPUTER_VISION_KEY_NAME string = computerVisionName != '' ? computerVisionKeySecret.name : '' diff --git a/infra/main.bicep b/infra/main.bicep index fca9bb6b7..3a4c1cf3a 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -430,7 +430,7 @@ module storekeys './app/storekeys.bicep' = if (useKeyVault) { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechServiceName - computerVisionName: computerVision.outputs.name + computerVisionName: useAdvancedImageProcessing ? computerVision.outputs.name : '' rgName: rgName } } @@ -783,7 +783,7 @@ module function './app/function.bicep' = if (hostingModel == 'code') { formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechService.outputs.name - computerVisionName: computerVision.outputs.name + computerVisionName: useAdvancedImageProcessing ? computerVision.outputs.name : '' clientKey: clientKey openAIKeyName: useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' storageAccountKeyName: useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' @@ -812,7 +812,7 @@ module function './app/function.bicep' = if (hostingModel == 'code') { AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing - AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint + AZURE_COMPUTER_VISION_ENDPOINT: useAdvancedImageProcessing ? computerVision.outputs.endpoint : '' DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -836,6 +836,7 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') formRecognizerName: formrecognizer.outputs.name contentSafetyName: contentsafety.outputs.name speechServiceName: speechService.outputs.name + computerVisionName: useAdvancedImageProcessing ? computerVision.outputs.name : '' clientKey: clientKey openAIKeyName: useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' storageAccountKeyName: useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' @@ -843,7 +844,7 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') searchKeyName: useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' contentSafetyKeyName: useKeyVault ? storekeys.outputs.CONTENT_SAFETY_KEY_NAME : '' speechKeyName: useKeyVault ? storekeys.outputs.SPEECH_KEY_NAME : '' - computerVisionName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' + computerVisionKeyName: useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' useKeyVault: useKeyVault keyVaultName: useKeyVault || authType == 'rbac' ? keyvault.outputs.name : '' authType: authType @@ -864,7 +865,7 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container') AZURE_SEARCH_INDEXER_NAME: azureSearchIndexer AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: azureSearchUseIntegratedVectorization USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing - AZURE_COMPUTER_VISION_ENDPOINT: computerVision.outputs.endpoint + AZURE_COMPUTER_VISION_ENDPOINT: useAdvancedImageProcessing ? computerVision.outputs.endpoint : '' DOCUMENT_PROCESSING_QUEUE_NAME: queueName ORCHESTRATION_STRATEGY: orchestrationStrategy LOGLEVEL: logLevel @@ -990,6 +991,8 @@ output AZURE_APP_SERVICE_HOSTING_MODEL string = hostingModel output AZURE_BLOB_CONTAINER_NAME string = blobContainerName output AZURE_BLOB_ACCOUNT_NAME string = storageAccountName output AZURE_BLOB_ACCOUNT_KEY string = useKeyVault ? storekeys.outputs.STORAGE_ACCOUNT_KEY_NAME : '' +output AZURE_COMPUTER_VISION_ENDPOINT string = useAdvancedImageProcessing ? computerVision.outputs.endpoint : '' +output AZURE_COMPUTER_VISION_KEY string = useKeyVault ? storekeys.outputs.COMPUTER_VISION_KEY_NAME : '' output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION string = computerVisionVectorizeImageApiVersion output AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION string = computerVisionVectorizeImageModelVersion output AZURE_CONTENT_SAFETY_ENDPOINT string = contentsafety.outputs.endpoint @@ -1011,7 +1014,6 @@ output AZURE_OPENAI_RESOURCE string = azureOpenAIResourceName output AZURE_OPENAI_EMBEDDING_MODEL string = azureOpenAIEmbeddingModel output AZURE_OPENAI_MODEL string = azureOpenAIModel output AZURE_OPENAI_API_KEY string = useKeyVault ? storekeys.outputs.OPENAI_KEY_NAME : '' -output AZURE_COMPUTER_VISION_ENDPOINT string = computerVision.outputs.endpoint output AZURE_RESOURCE_GROUP string = rgName output AZURE_SEARCH_KEY string = useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' output AZURE_SEARCH_SERVICE string = search.outputs.endpoint diff --git a/infra/main.json b/infra/main.json index 915088a7c..fec8cca5a 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "95501043309266990" + "templateHash": "13373198886203455254" } }, "parameters": { @@ -255,10 +255,11 @@ "defaultValue": "openai_function", "allowedValues": [ "openai_function", + "semantic_kernel", "langchain" ], "metadata": { - "description": "Orchestration strategy: openai_function or langchain str. If you use a old version of turbo (0301), plese select langchain" + "description": "Orchestration strategy: openai_function or semantic_kernel or langchain str. If you use a old version of turbo (0301), please select langchain" } }, "azureOpenAITemperature": { @@ -1452,9 +1453,7 @@ "speechServiceName": { "value": "[parameters('speechServiceName')]" }, - "computerVisionName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" - }, + "computerVisionName": "[if(parameters('useAdvancedImageProcessing'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value), createObject('value', ''))]", "rgName": { "value": "[variables('rgName')]" } @@ -1466,7 +1465,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "3769272141523051550" + "templateHash": "2723102221602621788" } }, "parameters": { @@ -1585,11 +1584,12 @@ } }, { + "condition": "[not(equals(parameters('computerVisionName'), ''))]", "type": "Microsoft.KeyVault/vaults/secrets", "apiVersion": "2022-07-01", "name": "[format('{0}/{1}', parameters('keyVaultName'), parameters('computerVisionKeyName'))]", "properties": { - "value": "[listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1]" + "value": "[if(not(equals(parameters('computerVisionName'), '')), listKeys(resourceId(subscription().subscriptionId, parameters('rgName'), 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1, '')]" } } ], @@ -1620,7 +1620,7 @@ }, "COMPUTER_VISION_KEY_NAME": { "type": "string", - "value": "[parameters('computerVisionKeyName')]" + "value": "[if(not(equals(parameters('computerVisionName'), '')), parameters('computerVisionKeyName'), '')]" } } } @@ -7422,9 +7422,7 @@ "speechServiceName": { "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('speechServiceName')), '2022-09-01').outputs.name.value]" }, - "computerVisionName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value]" - }, + "computerVisionName": "[if(parameters('useAdvancedImageProcessing'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value), createObject('value', ''))]", "clientKey": { "value": "[variables('clientKey')]" }, @@ -7460,7 +7458,7 @@ "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", - "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[if(parameters('useAdvancedImageProcessing'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value, '')]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -7474,7 +7472,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "3188820983633786286" + "templateHash": "14690928021789276012" } }, "parameters": { @@ -7653,7 +7651,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(or(parameters('useKeyVault'), equals(parameters('computerVisionName'), '')), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -8659,6 +8657,7 @@ "speechServiceName": { "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('speechServiceName')), '2022-09-01').outputs.name.value]" }, + "computerVisionName": "[if(parameters('useAdvancedImageProcessing'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.name.value), createObject('value', ''))]", "clientKey": { "value": "[variables('clientKey')]" }, @@ -8668,7 +8667,7 @@ "searchKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SEARCH_KEY_NAME.value), createObject('value', ''))]", "contentSafetyKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.CONTENT_SAFETY_KEY_NAME.value), createObject('value', ''))]", "speechKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.SPEECH_KEY_NAME.value), createObject('value', ''))]", - "computerVisionName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", + "computerVisionKeyName": "[if(parameters('useKeyVault'), createObject('value', reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value), createObject('value', ''))]", "useKeyVault": { "value": "[parameters('useKeyVault')]" }, @@ -8694,7 +8693,7 @@ "AZURE_SEARCH_INDEXER_NAME": "[parameters('azureSearchIndexer')]", "AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "[parameters('azureSearchUseIntegratedVectorization')]", "USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]", - "AZURE_COMPUTER_VISION_ENDPOINT": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]", + "AZURE_COMPUTER_VISION_ENDPOINT": "[if(parameters('useAdvancedImageProcessing'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value, '')]", "DOCUMENT_PROCESSING_QUEUE_NAME": "[variables('queueName')]", "ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]", "LOGLEVEL": "[parameters('logLevel')]" @@ -8708,7 +8707,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "3188820983633786286" + "templateHash": "14690928021789276012" } }, "parameters": { @@ -8887,7 +8886,7 @@ "value": "[parameters('dockerFullImageName')]" }, "appSettings": { - "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(parameters('useKeyVault'), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" + "value": "[union(parameters('appSettings'), createObject('WEBSITES_ENABLE_APP_SERVICE_STORAGE', 'false', 'AZURE_AUTH_TYPE', parameters('authType'), 'USE_KEY_VAULT', if(parameters('useKeyVault'), parameters('useKeyVault'), ''), 'AZURE_OPENAI_API_KEY', if(parameters('useKeyVault'), parameters('openAIKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('azureOpenAIName')), '2023-05-01').key1), 'AZURE_SEARCH_KEY', if(parameters('useKeyVault'), parameters('searchKeyName'), listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', parameters('azureAISearchName')), '2021-04-01-preview').primaryKey), 'AZURE_BLOB_ACCOUNT_KEY', if(parameters('useKeyVault'), parameters('storageAccountKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2021-09-01').keys[0].value), 'AZURE_FORM_RECOGNIZER_KEY', if(parameters('useKeyVault'), parameters('formRecognizerKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('formRecognizerName')), '2023-05-01').key1), 'AZURE_CONTENT_SAFETY_KEY', if(parameters('useKeyVault'), parameters('contentSafetyKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('contentSafetyName')), '2023-05-01').key1), 'AZURE_SPEECH_SERVICE_KEY', if(parameters('useKeyVault'), parameters('speechKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('speechServiceName')), '2023-05-01').key1), 'AZURE_COMPUTER_VISION_KEY', if(or(parameters('useKeyVault'), equals(parameters('computerVisionName'), '')), parameters('computerVisionKeyName'), listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', parameters('computerVisionName')), '2023-05-01').key1)))]" } }, "template": { @@ -10809,6 +10808,14 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.STORAGE_ACCOUNT_KEY_NAME.value, '')]" }, + "AZURE_COMPUTER_VISION_ENDPOINT": { + "type": "string", + "value": "[if(parameters('useAdvancedImageProcessing'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value, '')]" + }, + "AZURE_COMPUTER_VISION_KEY": { + "type": "string", + "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.COMPUTER_VISION_KEY_NAME.value, '')]" + }, "AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION": { "type": "string", "value": "[parameters('computerVisionVectorizeImageApiVersion')]" @@ -10893,10 +10900,6 @@ "type": "string", "value": "[if(parameters('useKeyVault'), reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'storekeys'), '2022-09-01').outputs.OPENAI_KEY_NAME.value, '')]" }, - "AZURE_COMPUTER_VISION_ENDPOINT": { - "type": "string", - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', 'computerVision'), '2022-09-01').outputs.endpoint.value]" - }, "AZURE_RESOURCE_GROUP": { "type": "string", "value": "[variables('rgName')]" From 676578d48c7dc24fc8798e4e419f13132d09475a Mon Sep 17 00:00:00 2001 From: Arpit Gaur Date: Wed, 15 May 2024 16:33:10 +0100 Subject: [PATCH 12/20] refactor: Apply pep8 on leftover files from pr-merge (#908) --- ...tion_embedder.py => test_integrated_vectorization_embedder.py} | 0 ...mputerVisionClient.py => test_azure_computer_vision_client.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename code/tests/utilities/helpers/processors/{test_Integrated_vectorization_embedder.py => test_integrated_vectorization_embedder.py} (100%) rename code/tests/utilities/helpers/{test_AzureComputerVisionClient.py => test_azure_computer_vision_client.py} (100%) diff --git a/code/tests/utilities/helpers/processors/test_Integrated_vectorization_embedder.py b/code/tests/utilities/helpers/processors/test_integrated_vectorization_embedder.py similarity index 100% rename from code/tests/utilities/helpers/processors/test_Integrated_vectorization_embedder.py rename to code/tests/utilities/helpers/processors/test_integrated_vectorization_embedder.py diff --git a/code/tests/utilities/helpers/test_AzureComputerVisionClient.py b/code/tests/utilities/helpers/test_azure_computer_vision_client.py similarity index 100% rename from code/tests/utilities/helpers/test_AzureComputerVisionClient.py rename to code/tests/utilities/helpers/test_azure_computer_vision_client.py From e790daa26c1f9e9f2e89838c31fa539959a7b445 Mon Sep 17 00:00:00 2001 From: Chinedum Echeta <60179183+cecheta@users.noreply.github.com> Date: Wed, 15 May 2024 17:48:58 +0100 Subject: [PATCH 13/20] fix: Fix generate_arm_templates.sh script, reformat all Bicep files (#922) --- infra/app/adminweb.bicep | 80 +++++++++++++++++--- infra/app/function.bicep | 92 ++++++++++++++++++++--- infra/app/storekeys.bicep | 37 +++++++-- infra/app/web.bicep | 78 ++++++++++++++++--- infra/app/workbook.bicep | 36 +++++++-- infra/core/ai/cognitiveservices.bicep | 42 ++++++----- infra/core/host/appservice.bicep | 23 ++++-- infra/core/host/functions.bicep | 17 ++++- infra/core/security/keyvault-access.bicep | 8 +- infra/core/security/keyvault.bicep | 16 ++-- infra/core/security/registry-access.bicep | 5 +- infra/core/storage/storage-account.bicep | 31 ++++---- infra/main.bicep | 60 +++++++++------ scripts/generate_arm_templates.sh | 2 +- 14 files changed, 409 insertions(+), 118 deletions(-) diff --git a/infra/app/adminweb.bicep b/infra/app/adminweb.bicep index 6363eb3ee..8303892cc 100644 --- a/infra/app/adminweb.bicep +++ b/infra/app/adminweb.bicep @@ -38,20 +38,80 @@ module adminweb '../core/host/appservice.bicep' = { runtimeName: runtimeName runtimeVersion: runtimeVersion keyVaultName: keyVaultName - dockerFullImageName: dockerFullImageName + dockerFullImageName: dockerFullImageName scmDoBuildDuringDeployment: useDocker ? false : true applicationInsightsName: applicationInsightsName appServicePlanId: appServicePlanId appSettings: union(appSettings, { - AZURE_AUTH_TYPE: authType - USE_KEY_VAULT: useKeyVault ? useKeyVault : '' - AZURE_OPENAI_API_KEY: useKeyVault ? openAIKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', azureOpenAIName), '2023-05-01').key1 - AZURE_SEARCH_KEY: useKeyVault ? searchKeyName : listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', azureAISearchName), '2021-04-01-preview').primaryKey - AZURE_BLOB_ACCOUNT_KEY: useKeyVault ? storageAccountKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', storageAccountName), '2021-09-01').keys[0].value - AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 - AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 - AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 - }) + AZURE_AUTH_TYPE: authType + USE_KEY_VAULT: useKeyVault ? useKeyVault : '' + AZURE_OPENAI_API_KEY: useKeyVault + ? openAIKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + azureOpenAIName + ), + '2023-05-01' + ).key1 + AZURE_SEARCH_KEY: useKeyVault + ? searchKeyName + : listAdminKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Search/searchServices', + azureAISearchName + ), + '2021-04-01-preview' + ).primaryKey + AZURE_BLOB_ACCOUNT_KEY: useKeyVault + ? storageAccountKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Storage/storageAccounts', + storageAccountName + ), + '2021-09-01' + ).keys[0].value + AZURE_FORM_RECOGNIZER_KEY: useKeyVault + ? formRecognizerKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + formRecognizerName + ), + '2023-05-01' + ).key1 + AZURE_CONTENT_SAFETY_KEY: useKeyVault + ? contentSafetyKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + contentSafetyName + ), + '2023-05-01' + ).key1 + AZURE_SPEECH_SERVICE_KEY: useKeyVault + ? speechKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + speechServiceName + ), + '2023-05-01' + ).key1 + }) } } diff --git a/infra/app/function.bicep b/infra/app/function.bicep index 3c69a3583..9b08040ff 100644 --- a/infra/app/function.bicep +++ b/infra/app/function.bicep @@ -42,17 +42,87 @@ module function '../core/host/functions.bicep' = { runtimeVersion: runtimeVersion dockerFullImageName: dockerFullImageName appSettings: union(appSettings, { - WEBSITES_ENABLE_APP_SERVICE_STORAGE: 'false' - AZURE_AUTH_TYPE: authType - USE_KEY_VAULT: useKeyVault ? useKeyVault : '' - AZURE_OPENAI_API_KEY: useKeyVault ? openAIKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', azureOpenAIName), '2023-05-01').key1 - AZURE_SEARCH_KEY: useKeyVault ? searchKeyName : listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', azureAISearchName), '2021-04-01-preview').primaryKey - AZURE_BLOB_ACCOUNT_KEY: useKeyVault ? storageAccountKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', storageAccountName), '2021-09-01').keys[0].value - AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 - AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 - AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 - AZURE_COMPUTER_VISION_KEY: (useKeyVault || computerVisionName == '') ? computerVisionKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 - }) + WEBSITES_ENABLE_APP_SERVICE_STORAGE: 'false' + AZURE_AUTH_TYPE: authType + USE_KEY_VAULT: useKeyVault ? useKeyVault : '' + AZURE_OPENAI_API_KEY: useKeyVault + ? openAIKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + azureOpenAIName + ), + '2023-05-01' + ).key1 + AZURE_SEARCH_KEY: useKeyVault + ? searchKeyName + : listAdminKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Search/searchServices', + azureAISearchName + ), + '2021-04-01-preview' + ).primaryKey + AZURE_BLOB_ACCOUNT_KEY: useKeyVault + ? storageAccountKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Storage/storageAccounts', + storageAccountName + ), + '2021-09-01' + ).keys[0].value + AZURE_FORM_RECOGNIZER_KEY: useKeyVault + ? formRecognizerKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + formRecognizerName + ), + '2023-05-01' + ).key1 + AZURE_CONTENT_SAFETY_KEY: useKeyVault + ? contentSafetyKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + contentSafetyName + ), + '2023-05-01' + ).key1 + AZURE_SPEECH_SERVICE_KEY: useKeyVault + ? speechKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + speechServiceName + ), + '2023-05-01' + ).key1 + AZURE_COMPUTER_VISION_KEY: (useKeyVault || computerVisionName == '') + ? computerVisionKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + computerVisionName + ), + '2023-05-01' + ).key1 + }) } } diff --git a/infra/app/storekeys.bicep b/infra/app/storekeys.bicep index 57084d9c1..9118acba7 100644 --- a/infra/app/storekeys.bicep +++ b/infra/app/storekeys.bicep @@ -19,7 +19,10 @@ resource storageAccountKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' parent: keyVault name: storageAccountKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.Storage/storageAccounts', storageAccountName), '2021-09-01').keys[0].value + value: listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.Storage/storageAccounts', storageAccountName), + '2021-09-01' + ).keys[0].value } } @@ -27,7 +30,10 @@ resource openAIKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { parent: keyVault name: openAIKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', azureOpenAIName), '2023-05-01').key1 + value: listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', azureOpenAIName), + '2023-05-01' + ).key1 } } @@ -35,7 +41,10 @@ resource searchKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { parent: keyVault name: searchKeyName properties: { - value: listAdminKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.Search/searchServices', azureAISearchName), '2021-04-01-preview').primaryKey + value: listAdminKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.Search/searchServices', azureAISearchName), + '2021-04-01-preview' + ).primaryKey } } @@ -43,7 +52,10 @@ resource formRecognizerKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' parent: keyVault name: formRecognizerKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 + value: listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', formRecognizerName), + '2023-05-01' + ).key1 } } @@ -51,7 +63,10 @@ resource contentSafetyKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = parent: keyVault name: contentSafetyKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 + value: listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', contentSafetyName), + '2023-05-01' + ).key1 } } @@ -59,7 +74,10 @@ resource speechKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = { parent: keyVault name: speechKeyName properties: { - value: listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 + value: listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', speechServiceName), + '2023-05-01' + ).key1 } } @@ -67,7 +85,12 @@ resource computerVisionKeySecret 'Microsoft.KeyVault/vaults/secrets@2022-07-01' parent: keyVault name: computerVisionKeyName properties: { - value: computerVisionName != '' ? listKeys(resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), '2023-05-01').key1 : '' + value: computerVisionName != '' + ? listKeys( + resourceId(subscription().subscriptionId, rgName, 'Microsoft.CognitiveServices/accounts', computerVisionName), + '2023-05-01' + ).key1 + : '' } } diff --git a/infra/app/web.bicep b/infra/app/web.bicep index 9abeb66a3..9bba279e2 100644 --- a/infra/app/web.bicep +++ b/infra/app/web.bicep @@ -39,15 +39,75 @@ module web '../core/host/appservice.bicep' = { applicationInsightsName: applicationInsightsName appServicePlanId: appServicePlanId appSettings: union(appSettings, { - AZURE_AUTH_TYPE: authType - USE_KEY_VAULT: useKeyVault ? useKeyVault : '' - AZURE_OPENAI_API_KEY: useKeyVault ? openAIKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', azureOpenAIName), '2023-05-01').key1 - AZURE_SEARCH_KEY: useKeyVault ? searchKeyName : listAdminKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', azureAISearchName), '2021-04-01-preview').primaryKey - AZURE_BLOB_ACCOUNT_KEY: useKeyVault ? storageAccountKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Storage/storageAccounts', storageAccountName), '2021-09-01').keys[0].value - AZURE_FORM_RECOGNIZER_KEY: useKeyVault ? formRecognizerKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', formRecognizerName), '2023-05-01').key1 - AZURE_CONTENT_SAFETY_KEY: useKeyVault ? contentSafetyKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', contentSafetyName), '2023-05-01').key1 - AZURE_SPEECH_SERVICE_KEY: useKeyVault ? speechKeyName : listKeys(resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.CognitiveServices/accounts', speechServiceName), '2023-05-01').key1 - }) + AZURE_AUTH_TYPE: authType + USE_KEY_VAULT: useKeyVault ? useKeyVault : '' + AZURE_OPENAI_API_KEY: useKeyVault + ? openAIKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + azureOpenAIName + ), + '2023-05-01' + ).key1 + AZURE_SEARCH_KEY: useKeyVault + ? searchKeyName + : listAdminKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Search/searchServices', + azureAISearchName + ), + '2021-04-01-preview' + ).primaryKey + AZURE_BLOB_ACCOUNT_KEY: useKeyVault + ? storageAccountKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.Storage/storageAccounts', + storageAccountName + ), + '2021-09-01' + ).keys[0].value + AZURE_FORM_RECOGNIZER_KEY: useKeyVault + ? formRecognizerKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + formRecognizerName + ), + '2023-05-01' + ).key1 + AZURE_CONTENT_SAFETY_KEY: useKeyVault + ? contentSafetyKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + contentSafetyName + ), + '2023-05-01' + ).key1 + AZURE_SPEECH_SERVICE_KEY: useKeyVault + ? speechKeyName + : listKeys( + resourceId( + subscription().subscriptionId, + resourceGroup().name, + 'Microsoft.CognitiveServices/accounts', + speechServiceName + ), + '2023-05-01' + ).key1 + }) keyVaultName: keyVaultName runtimeName: runtimeName runtimeVersion: runtimeVersion diff --git a/infra/app/workbook.bicep b/infra/app/workbook.bicep index efbd5b98f..bd4483bcd 100644 --- a/infra/app/workbook.bicep +++ b/infra/app/workbook.bicep @@ -14,14 +14,38 @@ var wookbookContents = loadTextContent('../workbooks/workbook.json') var wookbookContentsSubReplaced = replace(wookbookContents, '{subscription-id}', subscription().id) var wookbookContentsRGReplaced = replace(wookbookContentsSubReplaced, '{resource-group}', resourceGroup().name) var wookbookContentsAppServicePlanReplaced = replace(wookbookContentsRGReplaced, '{app-service-plan}', hostingPlanName) -var wookbookContentsBackendAppServiceReplaced = replace(wookbookContentsAppServicePlanReplaced, '{backend-app-service}', functionName) -var wookbookContentsWebAppServiceReplaced = replace(wookbookContentsBackendAppServiceReplaced, '{web-app-service}', websiteName) -var wookbookContentsAdminAppServiceReplaced = replace(wookbookContentsWebAppServiceReplaced, '{admin-app-service}', adminWebsiteName) -var wookbookContentsEventGridReplaced = replace(wookbookContentsAdminAppServiceReplaced, '{event-grid}', eventGridSystemTopicName) -var wookbookContentsLogAnalyticsReplaced = replace(wookbookContentsEventGridReplaced, '{log-analytics}', logAnalyticsName) +var wookbookContentsBackendAppServiceReplaced = replace( + wookbookContentsAppServicePlanReplaced, + '{backend-app-service}', + functionName +) +var wookbookContentsWebAppServiceReplaced = replace( + wookbookContentsBackendAppServiceReplaced, + '{web-app-service}', + websiteName +) +var wookbookContentsAdminAppServiceReplaced = replace( + wookbookContentsWebAppServiceReplaced, + '{admin-app-service}', + adminWebsiteName +) +var wookbookContentsEventGridReplaced = replace( + wookbookContentsAdminAppServiceReplaced, + '{event-grid}', + eventGridSystemTopicName +) +var wookbookContentsLogAnalyticsReplaced = replace( + wookbookContentsEventGridReplaced, + '{log-analytics}', + logAnalyticsName +) var wookbookContentsOpenAIReplaced = replace(wookbookContentsLogAnalyticsReplaced, '{open-ai}', azureOpenAIResourceName) var wookbookContentsAISearchReplaced = replace(wookbookContentsOpenAIReplaced, '{ai-search}', azureAISearchName) -var wookbookContentsStorageAccountReplaced = replace(wookbookContentsAISearchReplaced, '{storage-account}', storageAccountName) +var wookbookContentsStorageAccountReplaced = replace( + wookbookContentsAISearchReplaced, + '{storage-account}', + storageAccountName +) module cwydsa_workbook '../core/monitor/workbook.bicep' = { name: workbookDisplayName diff --git a/infra/core/ai/cognitiveservices.bicep b/infra/core/ai/cognitiveservices.bicep index a691cbab7..2feb375b1 100644 --- a/infra/core/ai/cognitiveservices.bicep +++ b/infra/core/ai/cognitiveservices.bicep @@ -8,19 +8,21 @@ param deployments array = [] param kind string = 'OpenAI' param managedIdentity bool = false -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Enabled' param sku object = { name: 'S0' } param allowedIpRules array = [] -param networkAcls object = empty(allowedIpRules) ? { - defaultAction: 'Allow' -} : { - ipRules: allowedIpRules - defaultAction: 'Deny' -} +param networkAcls object = empty(allowedIpRules) + ? { + defaultAction: 'Allow' + } + : { + ipRules: allowedIpRules + defaultAction: 'Deny' + } resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { name: name @@ -39,18 +41,22 @@ resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { } @batchSize(1) -resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: { - parent: account - name: deployment.name - properties: { - model: deployment.model - raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null - } - sku: contains(deployment, 'sku') ? deployment.sku : { - name: 'Standard' - capacity: 20 +resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [ + for deployment in deployments: { + parent: account + name: deployment.name + properties: { + model: deployment.model + raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null + } + sku: contains(deployment, 'sku') + ? deployment.sku + : { + name: 'Standard' + capacity: 20 + } } -}] +] output endpoint string = account.properties.endpoint output identityPrincipalId string = managedIdentity ? account.identity.principalId : '' diff --git a/infra/core/host/appservice.bicep b/infra/core/host/appservice.bicep index 54176522b..4d2dca50d 100644 --- a/infra/core/host/appservice.bicep +++ b/infra/core/host/appservice.bicep @@ -11,7 +11,14 @@ param managedIdentity bool = !empty(keyVaultName) // Runtime Properties @allowed([ - 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' + 'dotnet' + 'dotnetcore' + 'dotnet-isolated' + 'node' + 'python' + 'java' + 'powershell' + 'custom' ]) param runtimeName string param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}' @@ -58,7 +65,7 @@ resource appService 'Microsoft.Web/sites@2022-03-01' = { functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null healthCheckPath: healthCheckPath cors: { - allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) + allowedOrigins: union(['https://portal.azure.com', 'https://ms.portal.azure.com'], allowedOrigins) } } clientAffinityEnabled: clientAffinityEnabled @@ -88,7 +95,8 @@ module configAppSettings 'appservice-appsettings.bicep' = { name: '${name}-appSettings' params: { name: appService.name - appSettings: union(appSettings, + appSettings: union( + appSettings, { APPLICATIONINSIGHTS_ENABLED: string(!empty(applicationInsightsName)) AZURE_RESOURCE_GROUP: resourceGroup().name @@ -97,8 +105,11 @@ module configAppSettings 'appservice-appsettings.bicep' = { ENABLE_ORYX_BUILD: string(enableOryxBuild) }, runtimeName == 'python' && appCommandLine == '' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true' } : {}, - !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {}, - !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {}) + !empty(applicationInsightsName) + ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } + : {}, + !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {} + ) } } @@ -112,7 +123,7 @@ resource configLogs 'Microsoft.Web/sites/config@2022-03-01' = { failedRequestsTracing: { enabled: true } httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } } } - dependsOn: [ configAppSettings ] + dependsOn: [configAppSettings] } resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) { diff --git a/infra/core/host/functions.bicep b/infra/core/host/functions.bicep index bc427e14b..4f2033117 100644 --- a/infra/core/host/functions.bicep +++ b/infra/core/host/functions.bicep @@ -12,7 +12,14 @@ param storageAccountName string // Runtime Properties @allowed([ - 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' + 'dotnet' + 'dotnetcore' + 'dotnet-isolated' + 'node' + 'python' + 'java' + 'powershell' + 'custom' ]) param runtimeName string param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}' @@ -20,7 +27,10 @@ param runtimeVersion string // Function Settings @allowed([ - '~4', '~3', '~2', '~1' + '~4' + '~3' + '~2' + '~1' ]) param extensionVersion string = '~4' @@ -54,7 +64,8 @@ module functions 'appservice.bicep' = { appCommandLine: useDocker ? '' : appCommandLine applicationInsightsName: applicationInsightsName appServicePlanId: appServicePlanId - appSettings: union(appSettings, + appSettings: union( + appSettings, { AzureWebJobsStorage: 'DefaultEndpointsProtocol=https;AccountName=${storage.name};AccountKey=${storage.listKeys().keys[0].value};EndpointSuffix=${environment().suffixes.storage}' FUNCTIONS_EXTENSION_VERSION: extensionVersion diff --git a/infra/core/security/keyvault-access.bicep b/infra/core/security/keyvault-access.bicep index 316775f21..713c06c4c 100644 --- a/infra/core/security/keyvault-access.bicep +++ b/infra/core/security/keyvault-access.bicep @@ -2,18 +2,20 @@ metadata description = 'Assigns an Azure Key Vault access policy.' param name string = 'add' param keyVaultName string -param permissions object = { secrets: [ 'get', 'list' ] } +param permissions object = { secrets: ['get', 'list'] } param principalId string resource keyVaultAccessPolicies 'Microsoft.KeyVault/vaults/accessPolicies@2022-07-01' = { parent: keyVault name: name properties: { - accessPolicies: [ { + accessPolicies: [ + { objectId: principalId tenantId: subscription().tenantId permissions: permissions - } ] + } + ] } } diff --git a/infra/core/security/keyvault.bicep b/infra/core/security/keyvault.bicep index 314a1db61..ca338aa21 100644 --- a/infra/core/security/keyvault.bicep +++ b/infra/core/security/keyvault.bicep @@ -12,13 +12,15 @@ resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' = { properties: { tenantId: subscription().tenantId sku: { family: 'A', name: 'standard' } - accessPolicies: !empty(principalId) ? [ - { - objectId: principalId - permissions: { secrets: [ 'get', 'list' ] } - tenantId: subscription().tenantId - } - ] : [] + accessPolicies: !empty(principalId) + ? [ + { + objectId: principalId + permissions: { secrets: ['get', 'list'] } + tenantId: subscription().tenantId + } + ] + : [] } } diff --git a/infra/core/security/registry-access.bicep b/infra/core/security/registry-access.bicep index 5335efabc..ef5d2ae16 100644 --- a/infra/core/security/registry-access.bicep +++ b/infra/core/security/registry-access.bicep @@ -2,7 +2,10 @@ metadata description = 'Assigns ACR Pull permissions to access an Azure Containe param containerRegistryName string param principalId string -var acrPullRole = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d') +var acrPullRole = subscriptionResourceId( + 'Microsoft.Authorization/roleDefinitions', + '7f951dda-4ed3-4680-a7ca-43fe172d538d' +) resource aksAcrPull 'Microsoft.Authorization/roleAssignments@2022-04-01' = { scope: containerRegistry // Use when specifying a scope that is different than the deployment scope diff --git a/infra/core/storage/storage-account.bicep b/infra/core/storage/storage-account.bicep index 694d21573..43b4ce2c5 100644 --- a/infra/core/storage/storage-account.bicep +++ b/infra/core/storage/storage-account.bicep @@ -6,7 +6,8 @@ param tags object = {} @allowed([ 'Cool' 'Hot' - 'Premium' ]) + 'Premium' +]) param accessTier string = 'Hot' param allowBlobPublicAccess bool = false param allowCrossTenantReplication bool = true @@ -14,7 +15,7 @@ param allowSharedKeyAccess bool = true param containers array = [] param defaultToOAuthAuthentication bool = false param deleteRetentionPolicy object = {} -@allowed([ 'AzureDnsZone', 'Standard' ]) +@allowed(['AzureDnsZone', 'Standard']) param dnsEndpointType string = 'Standard' param kind string = 'StorageV2' param minimumTlsVersion string = 'TLS1_2' @@ -24,7 +25,7 @@ param networkAcls object = { bypass: 'AzureServices' defaultAction: 'Allow' } -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Enabled' param sku object = { name: 'Standard_LRS' } @@ -52,12 +53,14 @@ resource storage 'Microsoft.Storage/storageAccounts@2022-05-01' = { properties: { deleteRetentionPolicy: deleteRetentionPolicy } - resource container 'containers' = [for container in containers: { - name: container.name - properties: { - publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None' + resource container 'containers' = [ + for container in containers: { + name: container.name + properties: { + publicAccess: contains(container, 'publicAccess') ? container.publicAccess : 'None' + } } - }] + ] } resource queueServices 'queueServices' = if (!empty(queues)) { @@ -67,12 +70,14 @@ resource storage 'Microsoft.Storage/storageAccounts@2022-05-01' = { corsRules: [] } } - resource queue 'queues' = [for queue in queues: { - name: queue.name - properties: { - metadata: {} + resource queue 'queues' = [ + for queue in queues: { + name: queue.name + properties: { + metadata: {} + } } - }] + ] } } diff --git a/infra/main.bicep b/infra/main.bicep index 3a4c1cf3a..a0c3b6597 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -168,7 +168,8 @@ param computerVisionName string = 'computer-vision-${resourceToken}' param computerVisionSkuName string = 'S1' @description('Location of Computer Vision Resource (if useAdvancedImageProcessing=true)') -@allowed([ // List taken from https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/image-retrieval?tabs=python#prerequisites +@allowed([ + // List taken from https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/image-retrieval?tabs=python#prerequisites 'eastus' 'westus' 'koreacentral' @@ -184,7 +185,7 @@ param computerVisionLocation string = useAdvancedImageProcessing ? location : '' param computerVisionVectorizeImageApiVersion string = '2024-02-01' @description('Azure Computer Vision Vectorize Image Model Version') -param computerVisionVectorizeImageModelVersion string ='2023-04-15' +param computerVisionVectorizeImageModelVersion string = '2023-04-15' @description('Azure AI Search Resource') param azureAISearchName string = 'search-${resourceToken}' @@ -318,20 +319,25 @@ var defaultOpenAiDeployments = [ } ] -var openAiDeployments = concat(defaultOpenAiDeployments, useAdvancedImageProcessing ? [ - { - name: azureOpenAIVisionModel - model: { - format: 'OpenAI' - name: azureOpenAIVisionModelName - version: azureOpenAIVisionModelVersion - } - sku: { - name: 'Standard' - capacity: azureOpenAIVisionModelCapacity - } - } - ] : []) +var openAiDeployments = concat( + defaultOpenAiDeployments, + useAdvancedImageProcessing + ? [ + { + name: azureOpenAIVisionModel + model: { + format: 'OpenAI' + name: azureOpenAIVisionModelName + version: azureOpenAIVisionModelVersion + } + sku: { + name: 'Standard' + capacity: azureOpenAIVisionModelCapacity + } + } + ] + : [] +) module openai 'core/ai/cognitiveservices.bicep' = { name: azureOpenAIResourceName @@ -757,7 +763,9 @@ module workbook './app/workbook.bicep' = { hostingPlanName: hostingplan.outputs.name functionName: hostingModel == 'container' ? function_docker.outputs.functionName : function.outputs.functionName websiteName: hostingModel == 'container' ? web_docker.outputs.FRONTEND_API_NAME : web.outputs.FRONTEND_API_NAME - adminWebsiteName: hostingModel == 'container' ? adminweb_docker.outputs.WEBSITE_ADMIN_NAME : adminweb.outputs.WEBSITE_ADMIN_NAME + adminWebsiteName: hostingModel == 'container' + ? adminweb_docker.outputs.WEBSITE_ADMIN_NAME + : adminweb.outputs.WEBSITE_ADMIN_NAME eventGridSystemTopicName: eventgrid.outputs.name logAnalyticsName: monitoring.outputs.logAnalyticsWorkspaceName azureOpenAIResourceName: openai.outputs.name @@ -916,10 +924,12 @@ module storage 'core/storage/storage-account.bicep' = { sku: { name: 'Standard_GRS' } - deleteRetentionPolicy: azureSearchUseIntegratedVectorization ? { - enabled: true - days: 7 - } : {} + deleteRetentionPolicy: azureSearchUseIntegratedVectorization + ? { + enabled: true + days: 7 + } + : {} containers: [ { name: blobContainerName @@ -1039,6 +1049,10 @@ output AZURE_TENANT_ID string = tenant().tenantId output DOCUMENT_PROCESSING_QUEUE_NAME string = queueName output ORCHESTRATION_STRATEGY string = orchestrationStrategy output USE_KEY_VAULT bool = useKeyVault -output FRONTEND_WEBSITE_NAME string = hostingModel == 'code' ? web.outputs.FRONTEND_API_URI : web_docker.outputs.FRONTEND_API_URI -output ADMIN_WEBSITE_NAME string = hostingModel == 'code' ? adminweb.outputs.WEBSITE_ADMIN_URI : adminweb_docker.outputs.WEBSITE_ADMIN_URI +output FRONTEND_WEBSITE_NAME string = hostingModel == 'code' + ? web.outputs.FRONTEND_API_URI + : web_docker.outputs.FRONTEND_API_URI +output ADMIN_WEBSITE_NAME string = hostingModel == 'code' + ? adminweb.outputs.WEBSITE_ADMIN_URI + : adminweb_docker.outputs.WEBSITE_ADMIN_URI output LOGLEVEL string = logLevel diff --git a/scripts/generate_arm_templates.sh b/scripts/generate_arm_templates.sh index bfb1851e3..005b6d262 100755 --- a/scripts/generate_arm_templates.sh +++ b/scripts/generate_arm_templates.sh @@ -13,7 +13,7 @@ for ARG in $@; do TEMPLATES+=(${ARG#-f=}) else # Otherwise, it is a file that has been edited - az bicep format --insert-final-newline -f $ARG & + az bicep format -f $ARG & FILES+=($ARG) fi done From 9940c91e45233bad0e667628ef1319fa20e0bfd2 Mon Sep 17 00:00:00 2001 From: Adam Dougal Date: Thu, 16 May 2024 08:21:01 +0100 Subject: [PATCH 14/20] feat: Store image embeddings in search index (#921) Co-authored-by: Arpit Gaur --- .../utilities/helpers/azure_search_helper.py | 12 + .../helpers/embedders/push_embedder.py | 56 ++++- code/tests/functional/app_config.py | 2 +- code/tests/functional/conftest.py | 16 ++ .../functional/tests/functions/conftest.py | 1 + .../test_advanced_image_processing.py | 221 ++++++++++++++++++ .../helpers/test_azure_search_helper.py | 34 +++ .../utilities/helpers/test_push_embedder.py | 74 +++++- 8 files changed, 398 insertions(+), 18 deletions(-) diff --git a/code/backend/batch/utilities/helpers/azure_search_helper.py b/code/backend/batch/utilities/helpers/azure_search_helper.py index e949293ef..c48360e71 100644 --- a/code/backend/batch/utilities/helpers/azure_search_helper.py +++ b/code/backend/batch/utilities/helpers/azure_search_helper.py @@ -121,6 +121,18 @@ def create_index(self): ), ] + if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING: + logger.info("Adding image_vector field to index") + fields.append( + SearchField( + name="image_vector", + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + searchable=True, + vector_search_dimensions=1024, + vector_search_profile_name="myHnswProfile", + ), + ) + index = SearchIndex( name=self.env_helper.AZURE_SEARCH_INDEX, fields=fields, diff --git a/code/backend/batch/utilities/helpers/embedders/push_embedder.py b/code/backend/batch/utilities/helpers/embedders/push_embedder.py index e6001d7ce..7ab2ac29d 100644 --- a/code/backend/batch/utilities/helpers/embedders/push_embedder.py +++ b/code/backend/batch/utilities/helpers/embedders/push_embedder.py @@ -1,6 +1,8 @@ +import hashlib import json import logging from typing import List +from urllib.parse import urlparse from ...helpers.llm_helper import LLMHelper from ...helpers.env_helper import EnvHelper @@ -61,7 +63,10 @@ def __embed( source_url ) logger.info("Image vectors: " + str(image_vectors)) - # Coming soon, storing the image embeddings in Azure Search + + documents_to_upload.append( + self.__create_image_document(source_url, image_vectors) + ) else: documents: List[SourceDocument] = self.document_loading.load( source_url, embedding_config.loading @@ -71,15 +76,16 @@ def __embed( ) for document in documents: - documents_to_upload.append(self._convert_to_search_document(document)) + documents_to_upload.append(self.__convert_to_search_document(document)) - response = self.azure_search_helper.get_search_client().upload_documents( - documents_to_upload - ) - if not all([r.succeeded for r in response]): - raise Exception(response) + response = self.azure_search_helper.get_search_client().upload_documents( + documents_to_upload + ) + if not all([r.succeeded for r in response]): + logger.error("Failed to upload documents to search index") + raise Exception(response) - def _convert_to_search_document(self, document: SourceDocument): + def __convert_to_search_document(self, document: SourceDocument): embedded_content = self.llm_helper.generate_embeddings(document.content) metadata = { "id": document.id, @@ -100,3 +106,37 @@ def _convert_to_search_document(self, document: SourceDocument): "chunk": document.chunk, "offset": document.offset, } + + def __generate_document_id(self, source_url: str) -> str: + hash_key = hashlib.sha1(f"{source_url}_1".encode("utf-8")).hexdigest() + return f"doc_{hash_key}" + + def __create_image_document(self, source_url: str, image_vectors: List[float]): + parsed_url = urlparse(source_url) + + file_url = parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path + document_id = self.__generate_document_id(file_url) + filename = parsed_url.path + + sas_placeholder = ( + "_SAS_TOKEN_PLACEHOLDER_" + if parsed_url.netloc + and parsed_url.netloc.endswith(".blob.core.windows.net") + else "" + ) + + return { + "id": document_id, + "content": "", + "content_vector": [], + "image_vector": image_vectors, + "metadata": json.dumps( + { + "id": document_id, + "title": filename, + "source": file_url + sas_placeholder, + } + ), + "title": filename, + "source": file_url + sas_placeholder, + } diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py index ae1569027..b1c841c14 100644 --- a/code/tests/functional/app_config.py +++ b/code/tests/functional/app_config.py @@ -70,7 +70,7 @@ class AppConfig: "ORCHESTRATION_STRATEGY": "openai_function", "AZURE_SPEECH_RECOGNIZER_LANGUAGES": "en-US,es-ES", "TIKTOKEN_CACHE_DIR": f"{os.path.dirname(os.path.realpath(__file__))}/resources", - "USE_ADVANCED_IMAGE_PROCESSING": "True", + "USE_ADVANCED_IMAGE_PROCESSING": "False", "USE_KEY_VAULT": "False", # These values are set directly within EnvHelper, adding them here ensures # that they are removed from the environment when remove_from_environment() runs diff --git a/code/tests/functional/conftest.py b/code/tests/functional/conftest.py index 970a7372d..6e5e6408f 100644 --- a/code/tests/functional/conftest.py +++ b/code/tests/functional/conftest.py @@ -207,6 +207,22 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig): COMPUTER_VISION_VECTORIZE_IMAGE_REQUEST_METHOD, ).respond_with_json({"modelVersion": "2022-04-11", "vector": [1.0, 2.0, 3.0]}) + httpserver.expect_request( + f"/indexes('{app_config.get('AZURE_SEARCH_INDEX')}')/docs/search.index", + method="POST", + ).respond_with_json( + { + "value": [ + { + "key": "some-key", + "status": True, + "errorMessage": None, + "statusCode": 201, + } + ] + } + ) + yield httpserver.check() diff --git a/code/tests/functional/tests/functions/conftest.py b/code/tests/functional/tests/functions/conftest.py index d4102207f..8717b65b0 100644 --- a/code/tests/functional/tests/functions/conftest.py +++ b/code/tests/functional/tests/functions/conftest.py @@ -19,6 +19,7 @@ def app_config(make_httpserver, ca): "AZURE_SPEECH_REGION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_STORAGE_ACCOUNT_ENDPOINT": f"https://localhost:{make_httpserver.port}/", "AZURE_COMPUTER_VISION_ENDPOINT": f"https://localhost:{make_httpserver.port}/", + "USE_ADVANCED_IMAGE_PROCESSING": "True", "SSL_CERT_FILE": ca_temp_path, "CURL_CA_BUNDLE": ca_temp_path, } diff --git a/code/tests/functional/tests/functions/test_advanced_image_processing.py b/code/tests/functional/tests/functions/test_advanced_image_processing.py index 89867ce40..300ec4a7e 100644 --- a/code/tests/functional/tests/functions/test_advanced_image_processing.py +++ b/code/tests/functional/tests/functions/test_advanced_image_processing.py @@ -1,3 +1,4 @@ +import hashlib import json import os import sys @@ -141,3 +142,223 @@ def test_metadata_is_updated_after_processing( times=1, ), ) + + +def test_makes_correct_call_to_list_search_indexes( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + verify_request_made( + mock_httpserver=httpserver, + request_matcher=RequestMatcher( + path="/indexes", + method="GET", + headers={ + "Accept": "application/json;odata.metadata=minimal", + "Api-Key": app_config.get("AZURE_SEARCH_KEY"), + }, + query_string="api-version=2023-10-01-Preview", + times=1, + ), + ) + + +def test_makes_correct_call_to_create_documents_search_index( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + verify_request_made( + mock_httpserver=httpserver, + request_matcher=RequestMatcher( + path="/indexes", + method="POST", + headers={ + "Accept": "application/json;odata.metadata=minimal", + "Api-Key": app_config.get("AZURE_SEARCH_KEY"), + }, + query_string="api-version=2023-10-01-Preview", + json={ + "name": app_config.get("AZURE_SEARCH_INDEX"), + "fields": [ + { + "name": "id", + "type": "Edm.String", + "key": True, + "retrievable": True, + "searchable": False, + "filterable": True, + "sortable": False, + "facetable": False, + }, + { + "name": "content", + "type": "Edm.String", + "key": False, + "retrievable": True, + "searchable": True, + "filterable": False, + "sortable": False, + "facetable": False, + }, + { + "name": "content_vector", + "type": "Collection(Edm.Single)", + "searchable": True, + "dimensions": 2, + "vectorSearchProfile": "myHnswProfile", + }, + { + "name": "metadata", + "type": "Edm.String", + "key": False, + "retrievable": True, + "searchable": True, + "filterable": False, + "sortable": False, + "facetable": False, + }, + { + "name": "title", + "type": "Edm.String", + "key": False, + "retrievable": True, + "searchable": True, + "filterable": True, + "sortable": False, + "facetable": True, + }, + { + "name": "source", + "type": "Edm.String", + "key": False, + "retrievable": True, + "searchable": True, + "filterable": True, + "sortable": False, + "facetable": False, + }, + { + "name": "chunk", + "type": "Edm.Int32", + "key": False, + "retrievable": True, + "searchable": False, + "filterable": True, + "sortable": False, + "facetable": False, + }, + { + "name": "offset", + "type": "Edm.Int32", + "key": False, + "retrievable": True, + "searchable": False, + "filterable": True, + "sortable": False, + "facetable": False, + }, + { + "name": "image_vector", + "type": "Collection(Edm.Single)", + "searchable": True, + "dimensions": 1024, + "vectorSearchProfile": "myHnswProfile", + }, + ], + "semantic": { + "configurations": [ + { + "name": app_config.get( + "AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG" + ), + "prioritizedFields": { + "prioritizedContentFields": [{"fieldName": "content"}] + }, + } + ] + }, + "vectorSearch": { + "profiles": [ + {"name": "myHnswProfile", "algorithm": "default"}, + { + "name": "myExhaustiveKnnProfile", + "algorithm": "default_exhaustive_knn", + }, + ], + "algorithms": [ + { + "name": "default", + "kind": "hnsw", + "hnswParameters": { + "m": 4, + "efConstruction": 400, + "efSearch": 500, + "metric": "cosine", + }, + }, + { + "name": "default_exhaustive_knn", + "kind": "exhaustiveKnn", + "exhaustiveKnnParameters": {"metric": "cosine"}, + }, + ], + }, + }, + times=1, + ), + ) + + +def test_makes_correct_call_to_store_documents_in_search_index( + message: QueueMessage, httpserver: HTTPServer, app_config: AppConfig +): + # when + batch_push_results.build().get_user_function()(message) + + # then + expected_file_path = f"{app_config.get('AZURE_BLOB_CONTAINER_NAME')}/{FILE_NAME}" + expected_source_url = ( + f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{expected_file_path}" + ) + hash_key = hashlib.sha1(f"{expected_source_url}_1".encode("utf-8")).hexdigest() + expected_id = f"doc_{hash_key}" + verify_request_made( + mock_httpserver=httpserver, + request_matcher=RequestMatcher( + path=f"/indexes('{app_config.get('AZURE_SEARCH_INDEX')}')/docs/search.index", + method="POST", + headers={ + "Accept": "application/json;odata.metadata=none", + "Content-Type": "application/json", + "Api-Key": app_config.get("AZURE_SEARCH_KEY"), + }, + query_string="api-version=2023-10-01-Preview", + json={ + "value": [ + { + "id": expected_id, + "content": "", + "content_vector": [], + "image_vector": [1.0, 2.0, 3.0], + "metadata": json.dumps( + { + "id": expected_id, + "title": f"/{expected_file_path}", + "source": expected_source_url, + } + ), + "title": f"/{expected_file_path}", + "source": expected_source_url, + "@search.action": "upload", + } + ] + }, + times=1, + ), + ) diff --git a/code/tests/utilities/helpers/test_azure_search_helper.py b/code/tests/utilities/helpers/test_azure_search_helper.py index 7369965f2..11e808a19 100644 --- a/code/tests/utilities/helpers/test_azure_search_helper.py +++ b/code/tests/utilities/helpers/test_azure_search_helper.py @@ -28,6 +28,7 @@ AZURE_SEARCH_USE_SEMANTIC_SEARCH = False AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = "default" AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = "mock-log-index" +USE_ADVANCED_IMAGE_PROCESSING = False @pytest.fixture(autouse=True) @@ -65,6 +66,7 @@ def env_helper_mock(): AZURE_SEARCH_CONVERSATIONS_LOG_INDEX ) + env_helper.USE_ADVANCED_IMAGE_PROCESSING = USE_ADVANCED_IMAGE_PROCESSING env_helper.is_auth_type_keys.return_value = True yield env_helper @@ -253,6 +255,38 @@ def test_creates_search_index_if_not_exists( ) +@patch("backend.batch.utilities.helpers.azure_search_helper.SearchClient") +@patch("backend.batch.utilities.helpers.azure_search_helper.SearchIndexClient") +def test_creates_search_index_with_image_embeddings_when_advanced_image_processing_enabled( + search_index_client_mock: MagicMock, + search_client_mock: MagicMock, + env_helper_mock: MagicMock, +): + # given + env_helper_mock.USE_ADVANCED_IMAGE_PROCESSING = True + search_index_client_mock.return_value.list_index_names.return_value = [ + "some-irrelevant-index" + ] + + expected_image_vector_field = SearchField( + name="image_vector", + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + searchable=True, + vector_search_dimensions=1024, + vector_search_profile_name="myHnswProfile", + ) + + # when + AzureSearchHelper().get_search_client() + + # then + search_index_client_mock.return_value.create_index.assert_called_once() + assert ( + expected_image_vector_field + in search_index_client_mock.return_value.create_index.call_args.args[0].fields + ) + + @patch("backend.batch.utilities.helpers.azure_search_helper.SearchClient") @patch("backend.batch.utilities.helpers.azure_search_helper.SearchIndexClient") def test_does_not_create_search_index_if_it_exists( diff --git a/code/tests/utilities/helpers/test_push_embedder.py b/code/tests/utilities/helpers/test_push_embedder.py index df2fc034c..48f5a7b0a 100644 --- a/code/tests/utilities/helpers/test_push_embedder.py +++ b/code/tests/utilities/helpers/test_push_embedder.py @@ -1,3 +1,4 @@ +import hashlib import json import pytest from unittest.mock import MagicMock, call, patch @@ -112,35 +113,90 @@ def azure_computer_vision_mock(): yield mock -def test_embed_file_advanced_image_processing_skips_document_processing( - azure_search_helper_mock, +def test_embed_file_advanced_image_processing_vectorizes_image( + azure_computer_vision_mock, ): # given push_embedder = PushEmbedder(MagicMock(), MagicMock()) + source_url = "http://localhost:8080/some-file-name.jpg" # when - push_embedder.embed_file("some-url", "some-file-name.jpg") + push_embedder.embed_file(source_url, "some-file-name.jpg") # then - azure_search_helper_mock.return_value.get_search_client.assert_not_called() + azure_computer_vision_mock.return_value.vectorize_image.assert_called_once_with( + source_url + ) -def test_embed_file_advanced_image_processing_vectorizes_image( +def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index( azure_computer_vision_mock, + azure_search_helper_mock: MagicMock, ): # given push_embedder = PushEmbedder(MagicMock(), MagicMock()) - source_url = "http://localhost:8080/some-file-name.jpg" + storage_container = "some-container" + file_name = "some-file-name.jpg" + host_path = ( + f"http://localhost.blob.core.windows.net/{storage_container}/{file_name}" + ) + source_url = f"{host_path}?some-query=param" + image_embeddings = [1.0, 2.0, 3.0] + azure_computer_vision_mock.return_value.vectorize_image.return_value = ( + image_embeddings + ) # when push_embedder.embed_file(source_url, "some-file-name.jpg") # then - azure_computer_vision_mock.return_value.vectorize_image.assert_called_once_with( - source_url + hash_key = hashlib.sha1(f"{host_path}_1".encode("utf-8")).hexdigest() + expected_id = f"doc_{hash_key}" + + azure_search_helper_mock.return_value.get_search_client.return_value.upload_documents.assert_called_once_with( + [ + { + "id": expected_id, + "content": "", + "content_vector": [], + "image_vector": image_embeddings, + "metadata": json.dumps( + { + "id": expected_id, + "title": f"/{storage_container}/{file_name}", + "source": f"{host_path}_SAS_TOKEN_PLACEHOLDER_", + } + ), + "title": f"/{storage_container}/{file_name}", + "source": f"{host_path}_SAS_TOKEN_PLACEHOLDER_", + }, + ] ) +def test_embed_file_advanced_image_processing_raises_exception_on_failure( + azure_search_helper_mock, +): + # given + push_embedder = PushEmbedder(MagicMock(), MagicMock()) + + successful_indexing_result = MagicMock() + successful_indexing_result.succeeded = True + failed_indexing_result = MagicMock() + failed_indexing_result.succeeded = False + azure_search_helper_mock.return_value.get_search_client.return_value.upload_documents.return_value = [ + successful_indexing_result, + failed_indexing_result, + ] + + # when + then + with pytest.raises(Exception): + push_embedder.embed_file( + "some-url", + "some-file-name.jpg", + ) + + def test_embed_file_use_advanced_image_processing_does_not_vectorize_image_if_unsupported( azure_computer_vision_mock, mock_config_helper, azure_search_helper_mock ): @@ -217,7 +273,7 @@ def test_embed_file_generates_embeddings_for_documents(llm_helper_mock): def test_embed_file_stores_documents_in_search_index( document_chunking_mock, llm_helper_mock, - azure_search_helper_mock, + azure_search_helper_mock: MagicMock, ): # given push_embedder = PushEmbedder(MagicMock(), MagicMock()) From ada413d7153d57a2c2ece7ada996e19b8d8cda03 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 11:21:49 +0100 Subject: [PATCH 15/20] test: update test_batch_push_results following merge Co-authored-by: Arpit Gaur --- code/backend/batch/batch_push_results.py | 2 +- code/tests/test_batch_push_results.py | 98 +++++++++++++++++++++--- 2 files changed, 88 insertions(+), 12 deletions(-) diff --git a/code/backend/batch/batch_push_results.py b/code/backend/batch/batch_push_results.py index f1db00a60..fd98a874c 100644 --- a/code/backend/batch/batch_push_results.py +++ b/code/backend/batch/batch_push_results.py @@ -7,7 +7,7 @@ from utilities.helpers.azure_blob_storage_client import AzureBlobStorageClient from utilities.helpers.env_helper import EnvHelper from utilities.helpers.embedders.embedder_factory import EmbedderFactory -from utilities.search.Search import Search +from utilities.search.search import Search bp_batch_push_results = func.Blueprint() logger = logging.getLogger(__name__) diff --git a/code/tests/test_batch_push_results.py b/code/tests/test_batch_push_results.py index b7c39c267..5350d901d 100644 --- a/code/tests/test_batch_push_results.py +++ b/code/tests/test_batch_push_results.py @@ -1,3 +1,4 @@ +import json import sys import os import pytest @@ -15,17 +16,22 @@ @pytest.fixture(autouse=True) def get_processor_handler_mock(): - with patch("backend.batch.batch_push_results.EmbedderFactory.create") as mock: - processor_handler = mock.return_value - yield processor_handler + with patch( + "backend.batch.batch_push_results.EmbedderFactory.create" + ) as mock_create_embedder, patch( + "backend.batch.batch_push_results.Search.get_search_handler" + ) as mock_get_search_handler: + processor_handler_create = mock_create_embedder.return_value + processor_handler_get_search_handler = mock_get_search_handler.return_value + yield processor_handler_create, processor_handler_get_search_handler def test_get_file_name_from_message(): mock_queue_message = QueueMessage( body='{"message": "test message", "filename": "test_filename.md"}' ) - - file_name = _get_file_name_from_message(mock_queue_message) + message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + file_name = _get_file_name_from_message(message_body) assert file_name == "test_filename.md" @@ -34,25 +40,95 @@ def test_get_file_name_from_message_no_filename(): mock_queue_message = QueueMessage( body='{"data": { "url": "test/test/test_filename.md"} }' ) - - file_name = _get_file_name_from_message(mock_queue_message) + message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + file_name = _get_file_name_from_message(message_body) assert file_name == "test_filename.md" +def test_batch_push_results_with_unhandled_event_type(): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobUpdated"}' + ) + + with pytest.raises(NotImplementedError): + batch_push_results.build().get_user_function()(mock_queue_message) + + +@patch("backend.batch.batch_push_results._process_document_created_event") +def test_batch_push_results_with_blob_created_event( + mock_process_document_created_event, +): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_created_event.assert_called_once_with(expected_message_body) + + +@patch("backend.batch.batch_push_results._process_document_created_event") +def test_batch_push_results_with_no_event(mock_process_document_created_event): + mock_queue_message = QueueMessage( + body='{"data": { "url": "test/test/test_filename.md"} }' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_created_event.assert_called_once_with(expected_message_body) + + +@patch("backend.batch.batch_push_results._process_document_deleted_event") +def test_batch_push_results_with_blob_deleted_event( + mock_process_document_deleted_event, +): + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobDeleted", "filename": "test/test/test_filename.md"}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + + expected_message_body = json.loads(mock_queue_message.get_body().decode("utf-8")) + mock_process_document_deleted_event.assert_called_once_with(expected_message_body) + + @patch("backend.batch.batch_push_results.EnvHelper") @patch("backend.batch.batch_push_results.AzureBlobStorageClient") -def test_batch_push_results( - mock_azure_blob_storage_client, mock_env_helper, get_processor_handler_mock +def test_batch_push_results_with_blob_created_event_uses_embedder( + mock_azure_blob_storage_client, + mock_env_helper, + get_processor_handler_mock, ): + mock_create_embedder, mock_get_search_handler = get_processor_handler_mock + mock_queue_message = QueueMessage( - body='{"message": "test message", "filename": "test/test/test_filename.md"}' + body='{"eventType": "Microsoft.Storage.BlobCreated", "filename": "test/test/test_filename.md"}' ) mock_blob_client_instance = mock_azure_blob_storage_client.return_value mock_blob_client_instance.get_blob_sas.return_value = "test_blob_sas" batch_push_results.build().get_user_function()(mock_queue_message) - get_processor_handler_mock.embed_file.assert_called_once_with( + mock_create_embedder.embed_file.assert_called_once_with( "test_blob_sas", "test/test/test_filename.md" ) + + +@patch("backend.batch.batch_push_results.EnvHelper") +def test_batch_push_results_with_blob_deleted_event_uses_search_to_delete_with_sas_appended( + mock_env_helper, + get_processor_handler_mock, +): + mock_create_embedder, mock_get_search_handler = get_processor_handler_mock + + mock_queue_message = QueueMessage( + body='{"eventType": "Microsoft.Storage.BlobDeleted", "data": { "url": "https://test.test/test/test_filename.pdf"}}' + ) + + batch_push_results.build().get_user_function()(mock_queue_message) + mock_get_search_handler.delete_by_source.assert_called_once_with( + "https://test.test/test/test_filename.pdf_SAS_TOKEN_PLACEHOLDER_" + ) From cf2589e8aec390c9b08cb6cf43dd564b6596885e Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 11:36:01 +0100 Subject: [PATCH 16/20] refactor: move import order --- code/backend/batch/batch_push_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/backend/batch/batch_push_results.py b/code/backend/batch/batch_push_results.py index fd98a874c..f1e4a21dc 100644 --- a/code/backend/batch/batch_push_results.py +++ b/code/backend/batch/batch_push_results.py @@ -1,8 +1,8 @@ import os import logging import json -import azure.functions as func from urllib.parse import urlparse +import azure.functions as func from utilities.helpers.azure_blob_storage_client import AzureBlobStorageClient from utilities.helpers.env_helper import EnvHelper From 1adbd0a0abef658845f05ea96a5e1a09b0e436d2 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 12:18:47 +0100 Subject: [PATCH 17/20] chore: add pylint from selected interpreter --- .vscode/settings.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index 7c03e4d62..a75a82cf8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -22,4 +22,5 @@ "python.testing.cwd": "${workspaceFolder}/code", "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "pylint.path" : [ "${interpreter}", "-m", "pylint" ] } From fae07018b1c923bb82871a7a3abb34c1812b3ff6 Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 13:15:55 +0100 Subject: [PATCH 18/20] refactor: remove `any` --- code/backend/batch/batch_push_results.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/backend/batch/batch_push_results.py b/code/backend/batch/batch_push_results.py index f1e4a21dc..4058b96ef 100644 --- a/code/backend/batch/batch_push_results.py +++ b/code/backend/batch/batch_push_results.py @@ -14,7 +14,7 @@ logger.setLevel(level=os.environ.get("LOGLEVEL", "INFO").upper()) -def _get_file_name_from_message(message_body: any) -> str: +def _get_file_name_from_message(message_body) -> str: return message_body.get( "filename", "/".join( @@ -44,7 +44,7 @@ def batch_push_results(msg: func.QueueMessage) -> None: raise NotImplementedError(f"Unknown event type received: {event_type}") -def _process_document_created_event(message_body: any) -> None: +def _process_document_created_event(message_body) -> None: env_helper: EnvHelper = EnvHelper() blob_client = AzureBlobStorageClient() @@ -55,7 +55,7 @@ def _process_document_created_event(message_body: any) -> None: embedder.embed_file(file_sas, file_name) -def _process_document_deleted_event(message_body: any) -> None: +def _process_document_deleted_event(message_body) -> None: env_helper: EnvHelper = EnvHelper() search_handler = Search.get_search_handler(env_helper) From 3ceaeb9a44641baa01d40a61d2c4f120a12059da Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 13:24:19 +0100 Subject: [PATCH 19/20] chore: use in-project for poetry --- .devcontainer/postCreate.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/postCreate.sh b/.devcontainer/postCreate.sh index 6c333d58f..1d4073b25 100755 --- a/.devcontainer/postCreate.sh +++ b/.devcontainer/postCreate.sh @@ -6,6 +6,7 @@ pip install poetry # https://pypi.org/project/poetry-plugin-export/ pip install poetry-plugin-export +poetry config virtualenvs.in-project true poetry config warnings.export false poetry install --with dev From 66b3bf0d2335d0519b71b103ac34151fae16e57e Mon Sep 17 00:00:00 2001 From: Liam Moat Date: Thu, 16 May 2024 13:48:50 +0100 Subject: [PATCH 20/20] chore: remove in-project env --- .devcontainer/postCreate.sh | 1 - .vscode/settings.json | 1 - 2 files changed, 2 deletions(-) diff --git a/.devcontainer/postCreate.sh b/.devcontainer/postCreate.sh index 1d4073b25..6c333d58f 100755 --- a/.devcontainer/postCreate.sh +++ b/.devcontainer/postCreate.sh @@ -6,7 +6,6 @@ pip install poetry # https://pypi.org/project/poetry-plugin-export/ pip install poetry-plugin-export -poetry config virtualenvs.in-project true poetry config warnings.export false poetry install --with dev diff --git a/.vscode/settings.json b/.vscode/settings.json index a75a82cf8..4efc47061 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,7 +4,6 @@ "azureFunctions.scmDoBuildDuringDeployment": true, "azureFunctions.projectLanguage": "Python", "azureFunctions.projectRuntime": "~4", - "azureFunctions.pythonVenv": "${workspaceFolder}/.venv", "debug.internalConsoleOptions": "neverOpen", "azureFunctions.projectLanguageModel": 2, "files.insertFinalNewline": true,