Skip to content

Commit e9bf60e

Browse files
bambriztvaron3simorenoh
authored
Fix for formatting empty, malformed, or non json payload in error messages (#40889)
* Accomdate diagnsotis format error for empty or malformed payload In some cases when we get an exception the payload is not in a json format or is empty in which case we accommodate for that when formatting the error message. * Add fix for async client with logging, and setting defaults that are not none for open telemetry * Update _cosmos_http_logging_policy.py * Update _cosmos_http_logging_policy.py * Update _cosmos_http_logging_policy.py * Update CHANGELOG.md * Update sdk/cosmos/azure-cosmos/CHANGELOG.md Co-authored-by: Simon Moreno <30335873+simorenoh@users.noreply.github.com> * Update _cosmos_http_logging_policy.py * Update test_cosmos_http_logging_policy_async.py fixes test that were broken for async tests of http logging policy * Update test_cosmos_http_logging_policy_async.py * Update test_cosmos_http_logging_policy.py * Update test_cosmos_http_logging_policy_async.py --------- Co-authored-by: tvaron3 <tomas.varon1802@gmail.com> Co-authored-by: Simon Moreno <30335873+simorenoh@users.noreply.github.com>
1 parent c1b4a76 commit e9bf60e

File tree

6 files changed

+310
-38
lines changed

6 files changed

+310
-38
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#### Breaking Changes
88

99
#### Bugs Fixed
10+
* Fixed Diagnostics Error Log Formatting to handle error messages from non-CosmosHttpResponseExceptions. See [PR 40889](https://github.com/Azure/azure-sdk-for-python/pull/40889/files)
1011
* Fixed bug where `multiple_write_locations` option in client was not being honored. See [PR 40999](https://github.com/Azure/azure-sdk-for-python/pull/40999).
1112

1213
#### Other Changes

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_http_logging_policy.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from ._location_cache import LocationCache
3838
from .http_constants import HttpHeaders
3939
from ._global_endpoint_manager import _GlobalEndpointManager
40-
from .documents import DatabaseAccount, ConnectionPolicy
40+
from .documents import DatabaseAccount
4141

4242
if TYPE_CHECKING:
4343
from azure.core.rest import HttpRequest, HttpResponse, AsyncHttpResponse
@@ -55,10 +55,16 @@
5555

5656

5757
def _format_error(payload: str) -> str:
58-
output = json.loads(payload)
59-
ret_str = "\n\t" + "Code: " + output['code'] + "\n"
60-
message = output["message"].replace("\r\n", "\n\t\t").replace(",", ",\n\t\t")
61-
ret_str += "\t" + message + "\n"
58+
try:
59+
output = json.loads(payload)
60+
ret_str = "\n\t" + "Code: " + output['code'] + "\n"
61+
message = output["message"].replace("\r\n", "\n\t\t").replace(",", ",\n\t\t")
62+
ret_str += "\t" + message + "\n"
63+
except (json.JSONDecodeError, KeyError):
64+
try:
65+
ret_str = "\t" + payload.replace("\r\n", "\n\t\t").replace(",", ",\n\t\t") + "\n"
66+
except AttributeError:
67+
ret_str = str(payload)
6268
return ret_str
6369

6470

@@ -109,15 +115,16 @@ def on_request(
109115
if self._enable_diagnostics_logging:
110116

111117
http_request = request.http_request
112-
request.context["start_time"] = time.time()
118+
if "start_time" not in request.context:
119+
request.context["start_time"] = time.time()
113120
options = request.context.options
114121
# Get logger in my context first (request has been retried)
115122
# then read from kwargs (pop if that's the case)
116123
# then use my instance logger
117124
logger = request.context.setdefault("logger", options.pop("logger", self.logger))
118125
# If filtered is applied, and we are not calling on request from on response, just return to avoid logging
119126
# the request again
120-
filter_applied = (logger.filters) or any(bool(h.filters) for h in logger.handlers)
127+
filter_applied = bool(logger.filters) or any(bool(h.filters) for h in logger.handlers)
121128
if filter_applied and 'logger_attributes' not in request.context:
122129
return
123130
operation_type = http_request.headers.get('x-ms-thinclient-proxy-operation-type')
@@ -236,7 +243,7 @@ def on_response( # pylint: disable=too-many-statements, too-many-branches, too-
236243
http_response = response.http_response
237244
headers = request.http_request.headers
238245
sub_status_str = http_response.headers.get("x-ms-substatus")
239-
sub_status_code: Optional[int] = int(sub_status_str) if sub_status_str else None
246+
sub_status_code: Optional[int] = int(sub_status_str) if sub_status_str else 0
240247
url_obj = request.http_request.url # type: ignore[attr-defined, union-attr]
241248
try:
242249
duration: Optional[float] = float(http_response.headers.get("x-ms-request-duration-ms")) # type: ignore[union-attr, arg-type] # pylint: disable=line-too-long
@@ -248,7 +255,7 @@ def on_response( # pylint: disable=too-many-statements, too-many-branches, too-
248255
"status_code": http_response.status_code, "sub_status_code": sub_status_code,
249256
"verb": request.http_request.method,
250257
"operation_type": headers.get('x-ms-thinclient-proxy-operation-type'),
251-
"url": str(url_obj), "database_name": None, "collection_name": None,
258+
"url": str(url_obj), "database_name": "", "collection_name": "",
252259
"resource_type": headers.get('x-ms-thinclient-proxy-resource-type'), "is_request": False} # type: ignore[assignment] # pylint: disable=line-too-long
253260

254261
if log_data["url"]:
@@ -316,10 +323,10 @@ def __get_client_settings(self) -> Optional[Dict[str, Any]]:
316323
client_excluded_regions = []
317324
client_account_read_regions = []
318325
client_account_write_regions = []
319-
320-
if self.__global_endpoint_manager:
321-
if self.__global_endpoint_manager.Client and self.__global_endpoint_manager.Client.connection_policy:
322-
connection_policy: ConnectionPolicy = self.__global_endpoint_manager.Client.connection_policy
326+
if self.__global_endpoint_manager and hasattr(self.__global_endpoint_manager, 'client'):
327+
gem_client = self.__global_endpoint_manager.client
328+
if gem_client and gem_client.connection_policy:
329+
connection_policy = gem_client.connection_policy
323330
client_preferred_regions = connection_policy.PreferredLocations
324331
client_excluded_regions = connection_policy.ExcludedLocations
325332

@@ -328,10 +335,10 @@ def __get_client_settings(self) -> Optional[Dict[str, Any]]:
328335
client_account_read_regions = location_cache.account_read_locations
329336
client_account_write_regions = location_cache.account_write_locations
330337

331-
return {"Client Preferred Regions": client_preferred_regions,
332-
"Client Excluded Regions": client_excluded_regions,
333-
"Client Account Read Regions": client_account_read_regions,
334-
"Client Account Write Regions": client_account_write_regions}
338+
return {"Preferred Regions": client_preferred_regions,
339+
"Excluded Regions": client_excluded_regions,
340+
"Account Read Regions": client_account_read_regions,
341+
"Account Write Regions": client_account_write_regions}
335342

336343
def __get_database_account_settings(self) -> Optional[DatabaseAccount]:
337344
if self.__global_endpoint_manager and hasattr(self.__global_endpoint_manager, '_database_account_cache'):

sdk/cosmos/azure-cosmos/azure/cosmos/_global_endpoint_manager.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class _GlobalEndpointManager(object): # pylint: disable=too-many-instance-attrib
4444
"""
4545

4646
def __init__(self, client):
47-
self.Client = client
47+
self.client = client
4848
self.EnableEndpointDiscovery = client.connection_policy.EnableEndpointDiscovery
4949
self.PreferredLocations = client.connection_policy.PreferredLocations
5050
self.DefaultEndpoint = client.url_connection
@@ -170,28 +170,28 @@ def _endpoints_health_check(self, **kwargs):
170170
if success_count >= 4:
171171
break
172172
# save current dba timeouts
173-
previous_dba_read_timeout = self.Client.connection_policy.DBAReadTimeout
174-
previous_dba_connection_timeout = self.Client.connection_policy.DBAConnectionTimeout
173+
previous_dba_read_timeout = self.client.connection_policy.DBAReadTimeout
174+
previous_dba_connection_timeout = self.client.connection_policy.DBAConnectionTimeout
175175
try:
176176
if (endpoint in
177177
self.location_cache.location_unavailability_info_by_endpoint):
178178
# if the endpoint is unavailable, we need to lower the timeouts to be more aggressive in the
179179
# health check. This helps reduce the time the health check is blocking all requests.
180-
self.Client.connection_policy.override_dba_timeouts(constants._Constants
180+
self.client.connection_policy.override_dba_timeouts(constants._Constants
181181
.UnavailableEndpointDBATimeouts,
182182
constants._Constants
183183
.UnavailableEndpointDBATimeouts)
184-
self.Client._GetDatabaseAccountCheck(endpoint, **kwargs)
184+
self.client._GetDatabaseAccountCheck(endpoint, **kwargs)
185185
else:
186-
self.Client._GetDatabaseAccountCheck(endpoint, **kwargs)
186+
self.client._GetDatabaseAccountCheck(endpoint, **kwargs)
187187
success_count += 1
188188
self.location_cache.mark_endpoint_available(endpoint)
189189
except (exceptions.CosmosHttpResponseError, AzureError):
190190
self.mark_endpoint_unavailable_for_read(endpoint, False)
191191
self.mark_endpoint_unavailable_for_write(endpoint, False)
192192
finally:
193193
# after the health check for that endpoint setting the timeouts back to their original values
194-
self.Client.connection_policy.override_dba_timeouts(previous_dba_read_timeout,
194+
self.client.connection_policy.override_dba_timeouts(previous_dba_read_timeout,
195195
previous_dba_connection_timeout)
196196
self.location_cache.update_location_cache()
197197

@@ -204,20 +204,20 @@ def _GetDatabaseAccountStub(self, endpoint, **kwargs):
204204
:rtype: ~azure.cosmos.DatabaseAccount
205205
"""
206206
if endpoint in self.location_cache.location_unavailability_info_by_endpoint:
207-
previous_dba_read_timeout = self.Client.connection_policy.DBAReadTimeout
208-
previous_dba_connection_timeout = self.Client.connection_policy.DBAConnectionTimeout
207+
previous_dba_read_timeout = self.client.connection_policy.DBAReadTimeout
208+
previous_dba_connection_timeout = self.client.connection_policy.DBAConnectionTimeout
209209
try:
210210
# if the endpoint is unavailable, we need to lower the timeouts to be more aggressive in the
211211
# health check. This helps reduce the time the health check is blocking all requests.
212-
self.Client.connection_policy.override_dba_timeouts(constants._Constants
212+
self.client.connection_policy.override_dba_timeouts(constants._Constants
213213
.UnavailableEndpointDBATimeouts,
214214
constants._Constants
215215
.UnavailableEndpointDBATimeouts)
216-
database_account = self.Client.GetDatabaseAccount(endpoint, **kwargs)
216+
database_account = self.client.GetDatabaseAccount(endpoint, **kwargs)
217217
finally:
218218
# after the health check for that endpoint setting the timeouts back to their original values
219-
self.Client.connection_policy.override_dba_timeouts(previous_dba_read_timeout,
219+
self.client.connection_policy.override_dba_timeouts(previous_dba_read_timeout,
220220
previous_dba_connection_timeout)
221221
else:
222-
database_account = self.Client.GetDatabaseAccount(endpoint, **kwargs)
222+
database_account = self.client.GetDatabaseAccount(endpoint, **kwargs)
223223
return database_account

sdk/cosmos/azure-cosmos/tests/test_cosmos_http_logging_policy.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def test_default_http_logging_policy(self):
132132
self.client_default.delete_database(database_id)
133133

134134
def test_cosmos_http_logging_policy(self):
135-
# Test if we can log into from reading a database
135+
# Test if we can log info from reading a database
136136
database_id = "database_test-" + str(uuid.uuid4())
137137
self.client_diagnostic.create_database(id=database_id)
138138
assert all(m.levelname == 'INFO' for m in self.mock_handler_diagnostic.messages)
@@ -185,8 +185,8 @@ def test_cosmos_http_logging_policy(self):
185185

186186
def test_filtered_diagnostics_logging_policy(self):
187187
# Test if we can log errors with the filtered diagnostics logger
188-
database_id = "database_test-" + str(uuid.uuid4())
189-
container_id = "container_test-" + str(uuid.uuid4())
188+
database_id = "database_test_" + str(uuid.uuid4())
189+
container_id = "diagnostics_container_test_" + str(uuid.uuid4())
190190
self.client_filtered_diagnostic.create_database(id=database_id)
191191
database = self.client_filtered_diagnostic.get_database_client(database_id)
192192
database.create_container(id=container_id, partition_key=PartitionKey(path="/pk"))
@@ -273,16 +273,16 @@ def test_client_settings(self):
273273
# Verify endpoint locations
274274
messages_split = mock_handler.messages[1].message.split("\n")
275275
for message in messages_split:
276-
if "Client Preferred Regions:" in message:
276+
if "Preferred Regions:" in message:
277277
locations = get_locations_list(message)
278278
assert all_locations == locations
279-
elif "Client Excluded Regions:" in message:
279+
elif "Excluded Regions:" in message:
280280
locations = get_locations_list(message)
281281
assert client_excluded_locations == locations
282-
elif "Client Account Read Regions:" in message:
282+
elif "Account Read Regions:" in message:
283283
locations = get_locations_list(message)
284284
assert all_locations == locations
285-
elif "Client Account Write Regions:" in message:
285+
elif "Account Write Regions:" in message:
286286
locations = get_locations_list(message)
287287
assert all_locations == locations
288288

0 commit comments

Comments
 (0)