|
34 | 34 | } +
|
35 | 35 | if $._config.showMultiCluster then {
|
36 | 36 | expr: |||
|
37 |
| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 |
| 37 | + (count(kube_node_info) == 1 |
38 | 38 | and
|
39 |
| - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 |
| 39 | + sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - |
| 40 | + sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) > 0) |
| 41 | + or |
| 42 | + (sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - |
| 43 | + (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - |
| 44 | + max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 |
| 45 | + and |
| 46 | + (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - |
| 47 | + max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0) |
40 | 48 | ||| % $._config,
|
41 | 49 | annotations+: {
|
42 | 50 | description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
|
43 | 51 | },
|
44 | 52 | } else {
|
45 | 53 | expr: |||
|
46 |
| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 |
| 54 | + (count(kube_node_info) == 1 |
| 55 | + and |
| 56 | + sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - |
| 57 | + sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) > 0) |
| 58 | + or |
| 59 | + (sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - |
| 60 | + (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - |
| 61 | + max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 |
47 | 62 | and
|
48 |
| - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 |
| 63 | + (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - |
| 64 | + max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0) |
49 | 65 | ||| % $._config,
|
50 | 66 | annotations+: {
|
51 | 67 | description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config,
|
|
63 | 79 | } +
|
64 | 80 | if $._config.showMultiCluster then {
|
65 | 81 | expr: |||
|
66 |
| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 |
| 82 | + (count(kube_node_info) == 1 |
67 | 83 | and
|
68 |
| - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 |
| 84 | + sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - |
| 85 | + sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) > 0) |
| 86 | + or |
| 87 | + (sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - |
| 88 | + (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - |
| 89 | + max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 |
| 90 | + and |
| 91 | + (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - |
| 92 | + max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0) |
69 | 93 | ||| % $._config,
|
70 | 94 | annotations+: {
|
71 | 95 | description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.' % $._config,
|
72 | 96 | },
|
73 |
| - } else |
74 |
| - { |
75 |
| - expr: ||| |
76 |
| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 |
77 |
| - and |
78 |
| - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 |
79 |
| - ||| % $._config, |
80 |
| - annotations+: { |
81 |
| - description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.', |
82 |
| - }, |
| 97 | + } else { |
| 98 | + expr: ||| |
| 99 | + (count(kube_node_info) == 1 |
| 100 | + and |
| 101 | + sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - |
| 102 | + sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) > 0) |
| 103 | + or |
| 104 | + (sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - |
| 105 | + (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - |
| 106 | + max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 |
| 107 | + and |
| 108 | + (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - |
| 109 | + max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0) |
| 110 | + ||| % $._config, |
| 111 | + annotations+: { |
| 112 | + description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.', |
83 | 113 | },
|
| 114 | + }, |
84 | 115 | {
|
85 | 116 | alert: 'KubeCPUQuotaOvercommit',
|
86 | 117 | labels: {
|
|
0 commit comments