-
Notifications
You must be signed in to change notification settings - Fork 168
adding example policies for tpm and circuit breaker #119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
|
||
<policies> | ||
<inbound> | ||
<base /> | ||
<!--Priority Management Based on Subscription Keys: | ||
Applies quotas based on the subscription key. --> | ||
<choose> | ||
<when condition="@(context.Subscription.Id == "subscription1")"> | ||
<!-- Rate limit by TPM | ||
Each subscription has its own TPM counter ('subscription1TPM' in this case) that tracks total tokens used per request. | ||
If the response status is 200, the counter increases by the token count obtained in the response body. | ||
If the counter hits the 500 TPM limit within 60 seconds, the request will not be forwarded to the backend. | ||
Clients recieve 429 as the response status code. | ||
--> | ||
<rate-limit-by-key calls="500" renewal-period="60" counter-key="@(String.Concat(context.Subscription.Id,"TPM"))" increment-condition="@(context.Response.StatusCode >= 200 && context.Response.StatusCode < 400)" increment-count="@(context.Response.Body.As<JObject>(true).SelectToken("usage.total_tokens").ToObject<int>())" remaining-calls-header-name="remainingTPM" total-calls-header-name="totalTPM" /> | ||
</when> | ||
<when condition="@(context.Subscription.Name == "subscription2")"> | ||
<!-- Rate limit by TPM --> | ||
<rate-limit-by-key calls="200" renewal-period="60" counter-key="@(String.Concat(context.Subscription.Id,"TPM"))" increment-condition="@(context.Response.StatusCode >= 200 && context.Response.StatusCode < 400)" increment-count="@(context.Response.Body.As<JObject>(true).SelectToken("usage.total_tokens").ToObject<int>())" remaining-calls-header-name="remainingTPM" total-calls-header-name="totalTPM" /> | ||
</when> | ||
<otherwise> | ||
<rate-limit-by-key calls="5" renewal-period="60" counter-key="@(context.Subscription.Id)" /> | ||
</otherwise> | ||
</choose> | ||
</inbound> | ||
<backend> | ||
<!--The backend section inherits the base behavior without additional policies.--> | ||
<base /> | ||
</backend> | ||
</policies> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<policies> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can remove this, circuit breaker is built-in nowadays - https://learn.microsoft.com/en-us/azure/api-management/backends?tabs=bicep#circuit-breaker-preview There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed the file, and updated the readme as well. |
||
<inbound> | ||
<base /> | ||
<!-- Cache lookup with default state to allow requests --> | ||
<cache-lookup-value key="circuitBreakerState" variable-name="circuitBreakerState" default-value="true" /> | ||
<!-- Setting a default timeout --> | ||
<set-variable name="circuitBreakerTimeoutSeconds" value="30" /> | ||
<choose> | ||
<!-- Customize this for priority handling --> | ||
<when condition="@(bool.Parse((string)context.Variables["circuitBreakerState"]) == false)"> | ||
<!-- Return a response immediately if the circuit breaker is tripped --> | ||
<return-response> | ||
<set-status code="503" reason="Service Unavailable. Circuit tripped!" /> | ||
<set-header name="Retry-After" exists-action="override"> | ||
<value>@((string)context.Variables["circuitBreakerTimeoutSeconds"])</value> | ||
</set-header> | ||
</return-response> | ||
</when> | ||
</choose> | ||
</inbound> | ||
<backend> | ||
<base /> | ||
</backend> | ||
<outbound> | ||
<base /> | ||
<!-- Reset the circuit breaker on a successful response --> | ||
<cache-store-value key="circuitBreakerState" value="true" duration="300" /> | ||
</outbound> | ||
<on-error> | ||
<base /> | ||
<!-- Check for errors --> | ||
<!-- Customize this for specific errors/priority handling --> | ||
<choose> | ||
<when condition="@(context.Response.StatusCode >= 300)"> | ||
<!-- Trip the circuit breaker on an error --> | ||
<cache-store-value key="circuitBreakerState" value="false" duration="@(int.Parse((string)context.Variables["circuitBreakerTimeoutSeconds"]))" /> | ||
<!-- Continue with default error handling --> | ||
</when> | ||
</choose> | ||
</on-error> | ||
</policies> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any particular reason why we don't use
set-variable
to define the allowed calls and then only definerate-limit-by-key
once?