Skip to content

feat: Set limit for advanced image processing images #978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@
"python.testing.cwd": "${workspaceFolder}/code",
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"pylint.cwd": "${workspaceFolder}/code",
}
10 changes: 8 additions & 2 deletions code/backend/batch/utilities/helpers/env_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __load_config(self, **kwargs) -> None:
"AZURE_SEARCH_INDEX_IS_PRECHUNKED", ""
)
self.AZURE_SEARCH_FILTER = os.getenv("AZURE_SEARCH_FILTER", "")
self.AZURE_SEARCH_TOP_K = int(os.getenv("AZURE_SEARCH_TOP_K", "5"))
self.AZURE_SEARCH_TOP_K = self.get_env_var_int("AZURE_SEARCH_TOP_K", 5)
self.AZURE_SEARCH_ENABLE_IN_DOMAIN = (
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
)
Expand Down Expand Up @@ -114,6 +114,9 @@ def __load_config(self, **kwargs) -> None:
self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool(
"USE_ADVANCED_IMAGE_PROCESSING", "False"
)
self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int(
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1
)
self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv(
"AZURE_COMPUTER_VISION_ENDPOINT"
)
Expand Down Expand Up @@ -244,7 +247,10 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool:
def get_env_var_array(self, var_name: str, default: str = ""):
return os.getenv(var_name, default).split(",")

def get_env_var_float(self, var_name: str, default: int):
def get_env_var_int(self, var_name: str, default: int):
return int(os.getenv(var_name, default))

def get_env_var_float(self, var_name: str, default: float):
return float(os.getenv(var_name, default))

def is_auth_type_keys(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def create_image_url_list(self, source_documents):
doc.source.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
for doc in source_documents
if doc.title is not None and doc.title.split(".")[-1] in image_types
]
][: self.env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES]

return image_urls

Expand Down
1 change: 1 addition & 0 deletions code/tests/functional/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class AppConfig:
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "en-US,es-ES",
"TIKTOKEN_CACHE_DIR": f"{os.path.dirname(os.path.realpath(__file__))}/resources",
"USE_ADVANCED_IMAGE_PROCESSING": "False",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "1",
"USE_KEY_VAULT": "False",
# These values are set directly within EnvHelper, adding them here ensures
# that they are removed from the environment when remove_from_environment() runs
Expand Down
77 changes: 73 additions & 4 deletions code/tests/utilities/tools/test_question_answer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def env_helper_mock():
env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False
env_helper.USE_ADVANCED_IMAGE_PROCESSING = False
env_helper.AZURE_OPENAI_VISION_MODEL = "mock vision model"
env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = 1

yield env_helper

Expand Down Expand Up @@ -83,7 +84,7 @@ def search_handler_mock():


@pytest.fixture(autouse=True)
def source_documents_mock():
def get_source_documents_mock():
with patch(
"backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents"
) as mock:
Expand All @@ -106,11 +107,11 @@ def source_documents_mock():
),
]
mock.return_value = documents
yield documents
yield mock


def test_answer_question_returns_source_documents(
source_documents_mock: list[SourceDocument],
get_source_documents_mock: MagicMock,
):
# given
tool = QuestionAnswerTool()
Expand All @@ -121,7 +122,7 @@ def test_answer_question_returns_source_documents(
# then
assert len(answer.source_documents) == 2
assert isinstance(answer.source_documents[0], SourceDocument)
assert answer.source_documents == source_documents_mock
assert answer.source_documents == get_source_documents_mock.return_value


def test_answer_question_returns_answer():
Expand Down Expand Up @@ -350,3 +351,71 @@ def test_use_advanced_vision_processing(env_helper_mock, llm_helper_mock):
assert isinstance(answer, Answer)
assert answer.question == "mock question"
assert answer.answer == "mock content"


def test_limit_number_of_images_passed_to_llm(
get_source_documents_mock: MagicMock,
env_helper_mock: MagicMock,
llm_helper_mock: MagicMock,
):
# given
get_source_documents_mock.return_value = [
SourceDocument(
id="mock id",
content="mock content",
title="mock title",
source="mock source",
chunk=123,
offset=123,
page_number=123,
),
SourceDocument(
id="mock id 2",
content="mock content 2",
title="mock title 2.jpg",
source="mock source 2_SAS_TOKEN_PLACEHOLDER_",
chunk_id="mock chunk id 2",
),
SourceDocument(
id="mock id 3",
content="mock content 3",
title="mock title 3.jpg",
source="mock source 3_SAS_TOKEN_PLACEHOLDER_",
chunk_id="mock chunk id 3",
),
]
env_helper_mock.USE_ADVANCED_IMAGE_PROCESSING = True
tool = QuestionAnswerTool()

# when
tool.answer_question("mock question", [])

# then
llm_helper_mock.get_chat_completion.assert_called_once_with(
[
{"content": "mock answering system prompt", "role": "system"},
{
"content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question',
"name": "example_user",
"role": "system",
},
{
"content": "mock example answer",
"name": "example_assistant",
"role": "system",
},
{"content": "mock azure openai system message", "role": "system"},
{
"content": [
{
"type": "text",
"text": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}},{"[doc3]":{"content":"mock content 3"}}]}, Question: mock question',
},
{"type": "image_url", "image_url": "mock source 2mock sas"},
],
"role": "user",
},
],
model="mock vision model",
temperature=0,
)
7 changes: 7 additions & 0 deletions docs/advanced_image_processing.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,11 @@ Once enabled, advanced image processing will be enabled for all supported image

![image](./images/enable_advanced_image_processing.png)

The `ADVANCED_IMAGE_PROCESSING_MAX_IMAGES` environment variable can be used to control the maximum number of images passed to GPT-4 vision in a single request (default is `1`).
Increasing the number of images consumes more tokens and may result in throttled requests.

```bash
azd env set ADVANCED_IMAGE_PROCESSING_MAX_IMAGES 2
```

Advanced image processing is only used in the `custom` conversation flow and not the `byod` flow, as Azure OpenAI On Your Data only supports Ada embeddings. It is currently not possible to use advanced image processing when integrated vectorization is enabled.
7 changes: 7 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ param azureOpenAIModelCapacity int = 30
@description('Enables the use of a vision LLM and Computer Vision for embedding images')
param useAdvancedImageProcessing bool = false

@description('The maximum number of images to pass to the vision model in a single request')
param advancedImageProcessingMaxImages int = 1

@description('Azure OpenAI Vision Model Deployment Name')
param azureOpenAIVisionModel string = 'gpt-4'

Expand Down Expand Up @@ -554,6 +557,7 @@ module web './app/web.bicep' = if (hostingModel == 'code') {
AZURE_SPEECH_SERVICE_REGION: location
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
ORCHESTRATION_STRATEGY: orchestrationStrategy
CONVERSATION_FLOW: conversationFlow
LOGLEVEL: logLevel
Expand Down Expand Up @@ -627,6 +631,7 @@ module web_docker './app/web.bicep' = if (hostingModel == 'container') {
AZURE_SPEECH_SERVICE_REGION: location
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
ORCHESTRATION_STRATEGY: orchestrationStrategy
CONVERSATION_FLOW: conversationFlow
LOGLEVEL: logLevel
Expand Down Expand Up @@ -1097,3 +1102,5 @@ output ADMIN_WEBSITE_NAME string = hostingModel == 'code'
: adminweb_docker.outputs.WEBSITE_ADMIN_URI
output LOGLEVEL string = logLevel
output CONVERSATION_FLOW string = conversationFlow
output USE_ADVANCED_IMAGE_PROCESSING bool = useAdvancedImageProcessing
output ADVANCED_IMAGE_PROCESSING_MAX_IMAGES int = advancedImageProcessingMaxImages
1 change: 1 addition & 0 deletions infra/main.bicepparam
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME',
param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '0613')
param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30'))
param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false'))
param advancedImageProcessingMaxImages = int(readEnvironmentVariable('ADVANCED_IMAGE_PROCESSING_MAX_IMAGES', '1'))
param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4')
param azureOpenAIVisionModelName = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_NAME', 'gpt-4')
param azureOpenAIVisionModelVersion = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_VERSION', 'vision-preview')
Expand Down
19 changes: 18 additions & 1 deletion infra/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"_generator": {
"name": "bicep",
"version": "0.27.1.19265",
"templateHash": "6027201902589320671"
"templateHash": "10484197901623589764"
}
},
"parameters": {
Expand Down Expand Up @@ -229,6 +229,13 @@
"description": "Enables the use of a vision LLM and Computer Vision for embedding images"
}
},
"advancedImageProcessingMaxImages": {
"type": "int",
"defaultValue": 1,
"metadata": {
"description": "The maximum number of images to pass to the vision model in a single request"
}
},
"azureOpenAIVisionModel": {
"type": "string",
"defaultValue": "gpt-4",
Expand Down Expand Up @@ -2031,6 +2038,7 @@
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
"LOGLEVEL": "[parameters('logLevel')]"
Expand Down Expand Up @@ -2984,6 +2992,7 @@
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
"LOGLEVEL": "[parameters('logLevel')]"
Expand Down Expand Up @@ -11102,6 +11111,14 @@
"CONVERSATION_FLOW": {
"type": "string",
"value": "[parameters('conversationFlow')]"
},
"USE_ADVANCED_IMAGE_PROCESSING": {
"type": "bool",
"value": "[parameters('useAdvancedImageProcessing')]"
},
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": {
"type": "int",
"value": "[parameters('advancedImageProcessingMaxImages')]"
}
}
}
Loading