Container Resource Tracking
# container requests, limit , table
# options: format: table, tpye Instant
# variables:
# namespace: label_values(kube_naemspace_labels{namespace=~"선택|선택"})
# pod: label_values(kube_pod_info{namespace=~"$namespace"},pod)
# container: label_values(kube_pod_container_info{namespace=~"$namespace", pod=~"$pod"},container)
sum(kube_pod_container_resource_requests{resource="cpu", namespace="$namespace", pod=~"$pod", container=~"$container"}) by (namespace, pod ,container)
sum(kube_pod_container_resource_limits{resource="cpu", namespace="$namespace", pod=~"$pod", container=~"$container"}) by (namespace, pod ,container)
sum(kube_pod_container_resource_requests{resource="memory", namespace="$namespace", pod=~"$pod", container=~"$container"}) by (namespace, pod ,container)
sum(kube_pod_container_resource_limits{resource="memory", namespace="$namespace", pod=~"$pod", container=~"$container"}
# container cpu usage , graph
sum(rate(continaer_cpu_usage_seconds_total{image!="", namespace="$namespace", pod=~"$pod",container=~"$container}[$__rate_interval])) by (pod, container)
# continaer mem usage , graph
sum(container_memory_working_set_bytes{image!="", namespace=~"$namespace", pod=~"$pod", container=~"$container"}) by (pod, container)})
Cluster Resource 현황
# Node
count(up{job="node-exporter"}}
# namespace
count(kube_namespace_labels)
# pod
count(kube_pod_info)
# pvc
count(kube_persistentvolumeclaim_info)
# cpu
sum(node:node_num_cpu:sum)
# memory
sum(node_memory_MemTotal_bytes) / 1024 / 1024 / 1024
# resourcequota cpu
sum(kube_resourcequota{resource="requests.cpu", tyhpe="hard", namespace=~"선택|선택")
# resourcequota mem
sum(kube_resourcequota{resource="requests.memory", tyhpe="hard", namespace=~"선택|선택")
# cpu 여유량
sum(node:node_num_cpu:sum{node!~"masternode"}) - sum(kube_resourcequota{resource="requests.cpu", tyhpe="hard", namespace=~"선택|선택")
# mem 여유량
(sum(node_memory_MemTotal_bytes{instance!~"masternode"}) - sum(kube_resourcequota{resource="requests.memory", tyhpe="hard", namespace=~"선택|선택")) / 1024 / 1024 / 1024
# cpu 할당율
sum(kube_resourcequota{resource="requests.cpu", tyhpe="hard", namespace=~"선택|선택") / sum(node:node_num_cpu:sum{node!~"masternode"})
Cluster Resource Performance Report (사용량)
## option: Format: Table, Type: Instant
## record_rule
# cluster cpu 사용량: cpu_usage:totla_by_node:rate2m
-> count(max without(mode) (node_cpu_seconds_total)) - sum(rate(node_cpu_seconds_total{mode="idle"}[2m]))
# cluster memory 사용량: memory_usage:totla_by_node
-> sum(node_memory_Active_bytes) # calico 공식 홈페이지 참조
# cluster pod cpu 사용량: cpu_usage:total_by_pod
-> sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate)
# cluster pod mem 사용량: memory_usage:total_by_pod
-> sum(container_memory_working_set_bytes{container!="", pod!=""})
## Dashboard metrics
# cluster cpu 사용량 (avg)
avg_over_time(cpu_usage:totla_by_node:rate2m[$__range])
# cluster cpu 사용량 (max)
max_over_time(cpu_usage:totla_by_node:rate2m[$__range])
# memory 사용량 (avg)
avg_over_time(memory_usage:totla_by_node[$__range]) / 1024 / 1024 / 1024
# memory 사용량 (max)
max_over_time(memory_usage:totla_by_node[$__range]) / 1024 / 1024 / 1024
# pod cpu 사용량 (avg)
avg_over_time(cpu_usage:total_by_pod[$__range])
# pod cpu 사용량 (max)
max_over_time(cpu_usage:total_by_pod[$__range])
# pod mem 사용량 (avg)
avg_over_time(memory_usage:total_by_pod[$__range])
# pod mem 사용량 (max)
max_over_time(memory_usage:total_by_pod[$__range])
Cluster Resource Performance Report (사용률)
## option: Format: Table, Type: Instant
## record_rule
# cluster cpu 사용률: cpu_usage_rate:total_by_node:rate2m
-> 1 - avg(rate(node_cpu_seconds_total{mode="idle"[2m]))
# cluster memory 사용률: memory_usage_rate:totla_by_node
-> sum(node_memory_Active_bytes) / sum(node_memory_MemTotal_bytes)
# cluster pod cpu 사용률: cpu_usage_rate:total_by_pod
-> sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) / sum(node:node_num_cpu:sum)
# prometheus에서 기본 제공하는 record rule
# node:node_num_cpu:sum
# node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
# cluster pod mem 사용률: memory_usage_rate:total_by_pod
-> sum(container_memory_working_set_bytes{container!="",pod!=""}) / sum(node_memory_MemTotal_bytes)
## Dashboard metrics
# cluster cpu 사용률 (avg)
avg_over_time(cpu_usage_rate:totla_by_node:rate2m[$__range])
# cluster cpu 사용률 (max)
max_over_time(cpu_usage_rate:totla_by_node:rate2m[$__range])
# memory 사용률 (avg)
avg_over_time(memory_usage_rate:totla_by_node[$__range]) / 1024 / 1024 / 1024
# memory 사용률 (max)
max_over_time(memory_usage_rate:totla_by_node[$__range]) / 1024 / 1024 / 1024
# pod cpu 사용률 (avg)
avg_over_time(cpu_usage_rate:total_by_pod[$__range])
# pod cpu 사용률 (max)
max_over_time(cpu_usage_rate:total_by_pod[$__range])
# pod mem 사용률 (avg)
avg_over_time(memory_usage_rate:total_by_pod[$__range])
# pod mem 사용률 (max)
max_over_time(memory_usage:total_by_pod[$__range])
Namespace Resource Performance Report
## option Format: Table, Type: Instant
## Transform: Outher join - namespace, Organize fields
## tip
## transform 에서 filter data by values 로 값들을 필터 걸어서 안보이게 할 수 있음.
## record rule
cpu_usage_rate:per_namespace:rate2m
-> sum by(namespace) (rate(container_cpu_usage4_seconds_total{image=""}[2m])) / sum by (namespace) (kube_respurcequota{resource="requests.cpu",type="hard"})
memory_usage_rate:per_namespace:rate2m
-> sum by (namespace) (container_memory_working_set_bytes{image=""} / sum by (namespace) (kube_resourcequota{resource="requests.memory",type="hard"})
cpu_usage:per_namespace:rate2m
-> sum by (namespace) (rate(container_cpu_usage_seconds_total{image=""}[2m]))
memory_usage:per_namespace
-> sum by (namespace) (container_memory_working_set_bytes{image=""})
## Dashboard
# Namsepace Name & Create
floor((time() - kube_namespace_created{namespace=~"선택|선택"}) / 86400)
# Namespace resource quota hard
sum by (namespace) (kube_resourcequota {resource="requests.cpu", type="hard", namespace=~"선택|선택"})
sum by (namespace) (kube_resourcequota {resource="requests.memory", type="hard", namespace=~"선택|선택"})
# Namespace resource quota use
sum by (namespace) (kube_resourcequota {resource="requests.cpu", type="used", namespace=~"선택|선택"})
sum by (namespace) (kube_resourcequota {resource="requests.memory", type="used", namespace=~"선택|선택"})
# namespace resource 사용률
avg_over_time(cpu_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 100
max_over_time(cpu_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 100
avg_over_time(memory_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 100
max_over_time(memory_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 100
# namespace resource 사용률 예상 (30%)
avg_over_time(cpu_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3 *100
max_over_time(cpu_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3 *100
avg_over_time(memory_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3 *100
max_over_time(memory_usage_rate:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3 *100
# namespace resources 사용량
avg_over_time(cpu_usage:per_namespace:rate2m{namespace=~"선택|선택"}[$__range])
max_over_time(cpu_usage:per_namespace:rate2m{namespace=~"선택|선택"}[$__range])
avg_over_time(memory_usage:per_namespace{namespace=~"선택|선택"}[$__range]) / 1024 / 1024 / 1024
max_over_time(memory_usage:per_namespace{namespace=~"선택|선택"}[$__range]) / 1024 / 1024 / 1024
# namespace resources 사용량 예상 (30%)
avg_over_time(cpu_usage:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3
max_over_time(cpu_usage:per_namespace:rate2m{namespace=~"선택|선택"}[$__range]) * 1.3
avg_over_time(memory_usage:per_namespace{namespace=~"선택|선택"}[$__range]) * 1.3 / 1024 / 1024 / 1024
max_over_time(memory_usage:per_namespace{namespace=~"선택|선택"}[$__range]) * 1.3 / 1024 / 1024 / 1024