@@ -670,20 +670,21 @@ Resources:
670
670
OKActions: []
671
671
672
672
<% JAVALAB_APP_TYPES . each do | name | -%>
673
-
674
- <%= name%> HighSevereErrorRateAlarm:
673
+ <% {
674
+ TenPercentSevereErrorRateAlarm : { Threshold : 10 , AlarmName : 'ten_percent_severe_error_rate' } ,
675
+ NinetyPercentSevereErrorRateAlarm : { Threshold : 90 , AlarmName : 'ninety_percent_severe_error_rate' } ,
676
+ } . each do |alarmTitle , config | -%>
677
+ <%= name%> <%= alarmTitle%> :
675
678
Type: AWS::CloudWatch::Alarm
676
679
Properties:
677
- AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _high_severe_error_rate"
678
- AlarmDescription: Send page if Javabuilder severe error rate exceeds 10% for 20
679
- minutes. Occasional spikes are expected, but a sustained high error rate
680
- is an indication of an outage.
681
- ActionsEnabled: true
682
- AlarmActions:
683
- - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:Javabuilder-high-error-rate"]
680
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _<%= config [ :AlarmName ] %> "
681
+ AlarmDescription: Severe error rate in Javabuilder's <%= name%> build and run lambda (the core of
682
+ Javabuilder, which executes student <%= name%> code) exceeded <%= config [ :Threshold ] %> % for four
683
+ consecutive 5 minute periods.
684
+ ActionsEnabled: false
684
685
EvaluationPeriods: 4
685
686
DatapointsToAlarm: 4
686
- Threshold: 10
687
+ Threshold: <%= config [ :Threshold ] %>
687
688
ComparisonOperator: GreaterThanThreshold
688
689
TreatMissingData: notBreaching
689
690
Metrics:
@@ -713,20 +714,24 @@ Resources:
713
714
Value: !Ref BuildAndRunJava<%= name%> ProjectFunction
714
715
Period: 300
715
716
Stat: Sum
717
+ <% end%>
718
+
716
719
717
- <%= name%> HighErrorRateAlarm:
720
+ <% {
721
+ TwentyFivePercentErrorRateAlarm : { Threshold : 25 , AlarmName : 'twenty_five_percent_error_rate' } ,
722
+ NinetyPercentErrorRateAlarm : { Threshold : 90 , AlarmName : 'ninety_percent_error_rate' } ,
723
+ } . each do |alarmTitle , config | -%>
724
+ <%= name%> <%= alarmTitle%> :
718
725
Type: AWS::CloudWatch::Alarm
719
726
Properties:
720
- AlarmName: !Sub "${SubDomainName}_build_and_run_ <%= name . downcase%> _lambda_error_rate "
727
+ AlarmName: !Sub "${SubDomainName}_ <%= name . downcase%> _ <%= config [ :AlarmName ] %> "
721
728
AlarmDescription: Error rate in Javabuilder's <%= name%> build and run lambda (the core of
722
- Javabuilder, which executes student <%= name%> code) exceeded 10 % for four
729
+ Javabuilder, which executes student <%= name%> code) exceeded <%= config [ :Threshold ] %> % for four
723
730
consecutive 5 minute periods.
724
- ActionsEnabled: true
725
- AlarmActions:
726
- - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:javabuilder-build-and-run-lambda-error-rate"]
731
+ ActionsEnabled: false
727
732
EvaluationPeriods: 4
728
733
DatapointsToAlarm: 4
729
- Threshold: 25
734
+ Threshold: <%= config [ :Threshold ] %>
730
735
ComparisonOperator: GreaterThanThreshold
731
736
TreatMissingData: notBreaching
732
737
Metrics:
@@ -767,6 +772,7 @@ Resources:
767
772
Value: !Ref BuildAndRunJava<%= name%> ProjectFunction
768
773
Period: 300
769
774
Stat: TC(89000:)
775
+ <% end%>
770
776
771
777
<%= name%> SlowCleanupTimeAlarm:
772
778
Type: AWS::CloudWatch::Alarm
@@ -894,6 +900,111 @@ Resources:
894
900
Expression: ANOMALY_DETECTION_BAND(m1, 8)
895
901
ThresholdMetricId: ad1
896
902
903
+ <%= name%> MinimumUsageAlarm:
904
+ Type: AWS::CloudWatch::Alarm
905
+ Properties:
906
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _minimum_usage"
907
+ AlarmDescription: This alarm is to be used as part of a composite alarm, not by itself.
908
+ It triggers if the usage is above a minimum threshold, so we do not alarm on error
909
+ rates if we have very low usage.
910
+ ActionsEnabled: false
911
+ MetricName: Invocations
912
+ Namespace: AWS/Lambda
913
+ Statistic: Sum
914
+ Dimensions:
915
+ - Name: FunctionName
916
+ Value: !Ref BuildAndRunJava<%= name%> ProjectFunction
917
+ Period: 300
918
+ EvaluationPeriods: 4
919
+ DatapointsToAlarm: 4
920
+ Threshold: 100
921
+ ComparisonOperator: GreaterThanOrEqualToThreshold
922
+ TreatMissingData: notBreaching
923
+
924
+ <%= name%> SevereErrorRateAlarm:
925
+ Type: AWS::CloudWatch::CompositeAlarm
926
+ DependsOn:
927
+ - <%= name%> TenPercentSevereErrorRateAlarm
928
+ - <%= name%> MinimumUsageAlarm
929
+ - <%= name%> ElevatedSevereErrorRateAlarm
930
+ Properties:
931
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _severe_error_rate"
932
+ AlarmDescription: Alarm if Javabuilder severe error rate exceeds 10% every 5 minutes for 20
933
+ minutes and there are at least 100 requests every 5 minutes.
934
+ Occasional spikes are expected, but a sustained elevated severe error rate is an indication of an issue.
935
+ Severe errors are generated and emitted by our code.
936
+ ActionsEnabled: true
937
+ AlarmActions:
938
+ - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:Javabuilder-high-error-rate"]
939
+ AlarmRule: !Sub "ALARM(${SubDomainName}_<%= name . downcase%> _ten_percent_severe_error_rate) AND
940
+ ALARM(${SubDomainName}_<%= name . downcase%> _minimum_usage)"
941
+ InsufficientDataActions: []
942
+ OKActions: []
943
+ ActionsSuppressor: !Sub "arn:aws:cloudwatch:${AWS::Region}:${AWS::AccountId}:alarm:${SubDomainName}_<%= name . downcase%> _elevated_severe_error_rate"
944
+ ActionsSuppressorWaitPeriod: 120
945
+ ActionsSuppressorExtensionPeriod: 120
946
+
947
+ <%= name%> ElevatedSevereErrorRateAlarm:
948
+ Type: AWS::CloudWatch::CompositeAlarm
949
+ DependsOn:
950
+ - <%= name%> NinetyPercentSevereErrorRateAlarm
951
+ - <%= name%> MinimumUsageAlarm
952
+ Properties:
953
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _elevated_severe_error_rate"
954
+ AlarmDescription: Alarm if Javabuilder severe error rate exceeds 90% every 5 minutes for 20
955
+ minutes and there are at least 100 requests every 5 minutes.
956
+ Occasional spikes are expected, but a sustained high severe error rate is an indication of an outage.
957
+ Severe errors are generated and emitted by our code.
958
+ ActionsEnabled: true
959
+ AlarmActions:
960
+ - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:Javabuilder-high-error-rate"]
961
+ AlarmRule: !Sub "ALARM(${SubDomainName}_<%= name . downcase%> _ninety_percent_severe_error_rate) AND
962
+ ALARM(${SubDomainName}_<%= name . downcase%> _minimum_usage)"
963
+ InsufficientDataActions: []
964
+ OKActions: []
965
+
966
+ <%= name%> ErrorRateAlarm:
967
+ Type: AWS::CloudWatch::CompositeAlarm
968
+ DependsOn:
969
+ - <%= name%> TwentyFivePercentErrorRateAlarm
970
+ - <%= name%> MinimumUsageAlarm
971
+ - <%= name%> ElevatedErrorRateAlarm
972
+ Properties:
973
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _error_rate"
974
+ AlarmDescription: Alarm if Javabuilder severe error rate exceeds 25% every 5 minutes for 20
975
+ minutes and there are at least 100 requests every 5 minutes.
976
+ Occasional spikes are expected, but a sustained elevated error rate is an indication of an issue.
977
+ Errors are generated by the Lambda system.
978
+ ActionsEnabled: true
979
+ AlarmActions:
980
+ - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:Javabuilder-high-error-rate"]
981
+ AlarmRule: !Sub "ALARM(${SubDomainName}_<%= name . downcase%> _twenty_five_percent_error_rate) AND
982
+ ALARM(${SubDomainName}_<%= name . downcase%> _minimum_usage)"
983
+ InsufficientDataActions: []
984
+ OKActions: []
985
+ ActionsSuppressor: !Sub "arn:aws:cloudwatch:${AWS::Region}:${AWS::AccountId}:alarm:${SubDomainName}_<%= name . downcase%> _elevated_error_rate"
986
+ ActionsSuppressorWaitPeriod: 120
987
+ ActionsSuppressorExtensionPeriod: 120
988
+
989
+ <%= name%> ElevatedErrorRateAlarm:
990
+ Type: AWS::CloudWatch::CompositeAlarm
991
+ DependsOn:
992
+ - <%= name%> NinetyPercentErrorRateAlarm
993
+ - <%= name%> MinimumUsageAlarm
994
+ Properties:
995
+ AlarmName: !Sub "${SubDomainName}_<%= name . downcase%> _elevated_error_rate"
996
+ AlarmDescription: Alarm if Javabuilder error rate exceeds 90% every 5 minutes for 20
997
+ minutes and there are at least 100 requests every 5 minutes.
998
+ Occasional spikes are expected, but a sustained high error rate is an indication of an outage.
999
+ Errors are generated by the Lambda system.
1000
+ ActionsEnabled: true
1001
+ AlarmActions:
1002
+ - !If [SilenceAlertsCondition, !Ref AWS::NoValue, !Sub "arn:aws:sns:${AWS::Region}:${AWS::AccountId}:Javabuilder-high-error-rate"]
1003
+ AlarmRule: !Sub "ALARM(${SubDomainName}_<%= name . downcase%> _ninety_percent_error_rate) AND
1004
+ ALARM(${SubDomainName}_<%= name . downcase%> _minimum_usage)"
1005
+ InsufficientDataActions: []
1006
+ OKActions: []
1007
+
897
1008
<% end -%>
898
1009
899
1010
# We use shortened versions of names for partition keys (eg, user_id),
0 commit comments