Get error rate for an AWS Step Function state machine through CloudWatch
·1 min
Step Function emits the following execution metrics for each state machine:
- ExecutionsSucceeded.
- ExecutionsAborted.
- ExecutionsFailed.
- ExecutionsTimedOut.
We can get the overall error rate for a given state machine by doing the following:
aws cloudwatch get-metric-data \
--region us-east-1 \
--start-time 2020-06-01T00:00:00Z \
--end-time 2020-08-01T01:00:00Z \
--scan-by TimestampAscending \
--metric-data-queries '
[
{
"Id": "executionsAborted1",
"MetricStat": {
"Metric": {
"Namespace": "AWS/States",
"MetricName": "ExecutionsAborted",
"Dimensions": [
{
"Name": "StateMachineArn",
"Value": "<state-machine-ARN>"
}
]
},
"Period": 300,
"Stat": "Sum"
},
"ReturnData": false
},
{
"Id": "executionsFailed1",
"MetricStat": {
"Metric": {
"Namespace": "AWS/States",
"MetricName": "ExecutionsFailed",
"Dimensions": [
{
"Name": "StateMachineArn",
"Value": "<state-machine-ARN>"
}
]
},
"Period": 300,
"Stat": "Sum"
},
"ReturnData": false
},
{
"Id": "executionsSucceeded1",
"MetricStat": {
"Metric": {
"Namespace": "AWS/States",
"MetricName": "ExecutionsSucceeded",
"Dimensions": [
{
"Name": "StateMachineArn",
"Value": "<state-machine-ARN>"
}
]
},
"Period": 300,
"Stat": "Sum"
},
"ReturnData": false
},
{
"Id": "executionsTimedOut1",
"MetricStat": {
"Metric": {
"Namespace": "AWS/States",
"MetricName": "ExecutionsTimedOut",
"Dimensions": [
{
"Name": "StateMachineArn",
"Value": "<state-machine-ARN>"
}
]
},
"Period": 300,
"Stat": "Sum"
},
"ReturnData": false
},
{
"Id": "errorRate",
"Expression": "1 - (executionsSucceeded1)/(executionsSucceeded1 + executionsTimedOut1 + executionsFailed1 + executionsAborted1)"
}
]
'