23
23
_TYPE_MAP ,
24
24
DropCode ,
25
25
DropCodeType ,
26
+ RetryCode ,
27
+ RetryCodeType ,
26
28
_TRACE ,
27
29
)
28
30
@@ -354,4 +356,172 @@ def patched_transmit(self_exporter, envelopes):
354
356
drop_code_types .add (type (drop_code ).__name__ )
355
357
356
358
# Additional assertion to verify aggregation works
357
- multi_count_categories = [cat for cat , count in category_totals .items () if count > 1 ]
359
+ multi_count_categories = [cat for cat , count in category_totals .items () if count > 1 ]
360
+
361
+ def test_retry_items_count (self ):
362
+ """Test retry item counting with both RetryCode enums and integer status codes."""
363
+ retried_items = 0
364
+
365
+ metrics = self .mock_options .metrics
366
+ metrics ._counters .total_item_retry_count .clear ()
367
+
368
+ exporter = AzureMonitorTraceExporter (connection_string = self .mock_options .connection_string )
369
+
370
+ def patched_transmit (self_exporter , envelopes ):
371
+ self .mock_options .transmit_called [0 ] = True
372
+
373
+ for envelope in envelopes :
374
+ if not hasattr (envelope , "data" ) or not envelope .data :
375
+ continue
376
+
377
+ envelope_name = "Microsoft.ApplicationInsights." + envelope .data .base_type
378
+ telemetry_type = _BASE_TYPE_MAP .get (envelope_name , _UNKNOWN )
379
+
380
+ should_retry = random .choice ([True , False ])
381
+ if should_retry :
382
+ nonlocal retried_items
383
+
384
+ retry_type = random .choice (["http_status" , "client_timeout" , "unknown" ])
385
+
386
+ if retry_type == "http_status" :
387
+ # HTTP status codes that would trigger retries
388
+ status_codes = [429 , 503 , 500 , 502 , 504 ]
389
+ status_code = random .choice (status_codes )
390
+
391
+ failure_count = random .randint (1 , 3 )
392
+ retried_items += failure_count
393
+
394
+ metrics .count_retry_items (failure_count , telemetry_type , status_code , None )
395
+ elif retry_type == "client_timeout" :
396
+ timeout_messages = [
397
+ "Connection timed out after 30 seconds" ,
398
+ "Request timed out after 60 seconds" ,
399
+ "Operation timed out" ,
400
+ "Socket timeout occurred"
401
+ ]
402
+
403
+ exception_message = random .choice (timeout_messages )
404
+
405
+ # Simulate multiple retries for the same timeout type
406
+ failure_count = random .randint (1 , 4 )
407
+ retried_items += failure_count
408
+
409
+ metrics .count_retry_items (failure_count , telemetry_type , RetryCode .CLIENT_TIMEOUT , exception_message )
410
+ else :
411
+ # Unknown retry reasons
412
+ unknown_messages = [
413
+ "Unknown network error" ,
414
+ "Unexpected retry condition" ,
415
+ "Network instability detected" ,
416
+ "Connection reset by peer"
417
+ ]
418
+
419
+ exception_message = random .choice (unknown_messages )
420
+
421
+ failure_count = random .randint (1 , 3 )
422
+ retried_items += failure_count
423
+
424
+ metrics .count_retry_items (failure_count , telemetry_type , RetryCode .CLIENT_EXCEPTION , exception_message )
425
+
426
+ continue
427
+
428
+ return ExportResult .SUCCESS
429
+
430
+ exporter ._transmit = types .MethodType (patched_transmit , exporter )
431
+
432
+ resource = Resource .create ({"service.name" : "retry-test" , "service.instance.id" : "test-instance" })
433
+ trace_provider = TracerProvider (resource = resource )
434
+
435
+ processor = SimpleSpanProcessor (exporter )
436
+ trace_provider .add_span_processor (processor )
437
+
438
+ tracer = trace_provider .get_tracer (__name__ )
439
+
440
+ total_items = random .randint (15 , 25 ) # Increased to get more aggregation
441
+
442
+ for i in range (total_items ):
443
+ span_type = random .choice (["client" , "server" ])
444
+
445
+ if span_type == "client" :
446
+ # Client spans generate RemoteDependencyData
447
+ with tracer .start_as_current_span (
448
+ name = f"dependency-{ i } " ,
449
+ kind = trace .SpanKind .CLIENT ,
450
+ attributes = {
451
+ "db.system" : "mysql" ,
452
+ "db.name" : "test_db" ,
453
+ "db.operation" : "query" ,
454
+ "net.peer.name" : "test-db-server" ,
455
+ "net.peer.port" : 3306 ,
456
+ }
457
+ ) as span :
458
+ span .set_status (trace .Status (trace .StatusCode .OK ))
459
+ time .sleep (0.01 )
460
+ else :
461
+ # Server spans generate RequestData
462
+ with tracer .start_as_current_span (
463
+ name = f"GET /api/endpoint-{ i } " ,
464
+ kind = trace .SpanKind .SERVER ,
465
+ attributes = {
466
+ "http.method" : "GET" ,
467
+ "http.url" : f"https://example.com/api/endpoint-{ i } " ,
468
+ "http.route" : f"/api/endpoint-{ i } " ,
469
+ "http.status_code" : 200 ,
470
+ "http.scheme" : "https" ,
471
+ "http.host" : "example.com" ,
472
+ }
473
+ ) as span :
474
+ span .set_status (trace .Status (trace .StatusCode .OK ))
475
+ time .sleep (0.01 )
476
+
477
+ trace_provider .force_flush ()
478
+
479
+ self .metrics_instance = metrics
480
+
481
+ self .assertTrue (self .mock_options .transmit_called [0 ], "Exporter _transmit method was not called" )
482
+
483
+ # Enhanced counting and verification logic
484
+ actual_retried_count = 0
485
+ category_totals = {}
486
+ http_status_totals = {}
487
+ client_timeout_totals = {}
488
+ unknown_retry_totals = {}
489
+
490
+ for telemetry_type , retry_code_data in metrics ._counters .total_item_retry_count .items ():
491
+ for retry_code , reason_map in retry_code_data .items ():
492
+ if isinstance (reason_map , dict ):
493
+ for reason , count in reason_map .items ():
494
+ actual_retried_count += count
495
+ category_totals [reason ] = category_totals .get (reason , 0 ) + count
496
+
497
+ # Separate HTTP status codes from client exceptions
498
+ if isinstance (retry_code , int ):
499
+ http_status_totals [reason ] = http_status_totals .get (reason , 0 ) + count
500
+ elif retry_code == RetryCode .CLIENT_TIMEOUT :
501
+ client_timeout_totals [reason ] = client_timeout_totals .get (reason , 0 ) + count
502
+ elif retry_code == RetryCode .CLIENT_EXCEPTION :
503
+ unknown_retry_totals [reason ] = unknown_retry_totals .get (reason , 0 ) + count
504
+ else :
505
+ actual_retried_count += reason_map
506
+
507
+ # Main assertion
508
+ self .assertEqual (
509
+ actual_retried_count ,
510
+ retried_items ,
511
+ f"Expected { retried_items } retried items, got { actual_retried_count } . "
512
+ f"HTTP Status retries: { len (http_status_totals )} , Client Timeout retries: { len (client_timeout_totals )} , "
513
+ f"Unknown retries: { len (unknown_retry_totals )} "
514
+ )
515
+
516
+ # Verify aggregation occurred
517
+ self .assertGreater (len (http_status_totals ) + len (client_timeout_totals ) + len (unknown_retry_totals ), 0 ,
518
+ "At least one type of retry should have occurred" )
519
+
520
+ # Verify that both integer and enum retry codes are being stored properly
521
+ retry_code_types = set ()
522
+ for telemetry_type , retry_code_data in metrics ._counters .total_item_retry_count .items ():
523
+ for retry_code in retry_code_data .keys ():
524
+ retry_code_types .add (type (retry_code ).__name__ )
525
+
526
+ # Additional assertion to verify aggregation works
527
+ multi_count_categories = [cat for cat , count in category_totals .items () if count > 1 ]
0 commit comments