@@ -1343,6 +1343,60 @@ def k8s_resource(set, kind)
1343
1343
} )
1344
1344
end
1345
1345
1346
+ it 'creates a paging alert rule for critical status components' do
1347
+ factory = eval_stacks do
1348
+ stack "mystack" do
1349
+ app_service "x" , :kubernetes => true do
1350
+ self . maintainers = [ person ( 'Testers' ) ]
1351
+ self . description = 'Testing'
1352
+ self . alerts_channel = 'test'
1353
+ self . page_on_critical = true
1354
+
1355
+ self . application = 'MyApplication'
1356
+ self . startup_alert_threshold = '1h'
1357
+ end
1358
+ end
1359
+ env "e1" , :primary_site => 'space' do
1360
+ instantiate_stack "mystack"
1361
+ end
1362
+ end
1363
+ set = factory . inventory . find_environment ( 'e1' ) . definitions [ 'mystack' ] . k8s_machinesets [ 'x' ]
1364
+ prometheus_rule = k8s_resource ( set , 'PrometheusRule' )
1365
+
1366
+ expect ( prometheus_rule [ 'apiVersion' ] ) . to eql ( 'monitoring.coreos.com/v1' )
1367
+ expect ( prometheus_rule [ 'metadata' ] ) . to eql ( 'labels' => {
1368
+ 'prometheus' => 'main' ,
1369
+ 'role' => 'alert-rules' ,
1370
+ 'app.kubernetes.io/managed-by' => 'stacks' ,
1371
+ 'stack' => 'mystack' ,
1372
+ 'machineset' => 'x' ,
1373
+ 'group' => 'blue' ,
1374
+ 'app.kubernetes.io/instance' => 'blue' ,
1375
+ 'app.kubernetes.io/part-of' => 'x' ,
1376
+ 'app.kubernetes.io/component' => 'app_service'
1377
+ } ,
1378
+ 'name' => 'x-blue-app' ,
1379
+ 'namespace' => 'e1' )
1380
+
1381
+ expect ( prometheus_rule [ 'spec' ] [ 'groups' ] . first [ 'name' ] ) . to eql ( 'stacks-alerts' )
1382
+ status_critical_rule = prometheus_rule [ 'spec' ] [ 'groups' ] . first [ 'rules' ] . find do |r |
1383
+ r [ 'alert' ] == 'StatusCritical'
1384
+ end
1385
+ expected_status_page_url = "https://go.timgroup.com/insight/space/proxy/{{ $labels.namespace }}/{{ $labels.pod }}/info/status"
1386
+ expect ( status_critical_rule ) . to eql ( 'alert' => 'StatusCritical' ,
1387
+ 'expr' => 'sum(tucker_component_status{job="x-blue-app",status="critical"}) by (pod, namespace) > 0' ,
1388
+ 'labels' => {
1389
+ 'severity' => 'critical' ,
1390
+ 'alertname' => 'x-blue-app CRITICAL' ,
1391
+ 'alert_owner_channel' => 'test' ,
1392
+ 'pagerduty' => 'true'
1393
+ } ,
1394
+ 'annotations' => {
1395
+ 'message' => '{{ $value }} components are critical on {{ $labels.namespace }}/{{ $labels.pod }}' ,
1396
+ 'status_page_url' => expected_status_page_url
1397
+ } )
1398
+ end
1399
+
1346
1400
it 'creates an alert rule for pods stuck in a crash loop' do
1347
1401
factory = eval_stacks do
1348
1402
stack "mystack" do
0 commit comments