network
#
util
sum(rate(node_network_receive_bytes[5m])) by (node) + sum(rate(node_network_transmit_bytes[5m])) by (node)
satur
sum(rate(node_network_receive_drop[5m])) by (node) + sum(rate(node_network_transmit_drop[5m])) by (node)
errors
node_network_receive_errs and node_network_transmit_errs
disk
#
satur and util (size)
sum(node_filesystem_free{mountpoint="/"}) by (node, mountpoint) / sum(node_filesystem_size{mountpoint="/"}) by (node, mountpoint)
satur and util (throw)
node_disk_io_now and IO time node_disk_io_time_ms as well as a weighted io time node_disk_io_weighted
memory
#
utulization
1 - sum(node_memory_MemAvailable) by (node)
/ sum(node_memory_MemTotal) by (node)
errors
node_edac_correctable_errors_total
node_edac_uncorrectable_errors_total
node_edac_csrow_correctable_errors_total
node_edac_csrow_uncorrectable_errors_total
cpu
#
utulization
sum(rate(
node_cpu{mode!=”idle”,
mode!=”iowait”,
mode!~”^(?:guest.*)$”
}[5m])) BY (instance)
satturation
sum(node_load1) by (node) / count(node_cpu{mode="system"}) by (node) * 100
apiserver
#
rate
sum(rate(apiserver_request_count[5m])) by (resource, subresource, verb)
errors
rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
duration
histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket[5m]))
by (le, resource, subresource, verb) ) / 1e+06
container
#
cpu
util
sum(
rate(container_cpu_usage_seconds_total[5m]))
by (container_name)
satur
sum(
rate(container_cpu_cfs_throttled_seconds_total[5m]))
by (container_name)
memory
util
sum(container_memory_working_set_bytes{name!~"POD"})
by (name)
satur
sum(container_memory_working_set_bytes) by (container_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes,
"container_name", "", "container")) by (container_name)
disk
util
sum(rate(container_fs_writes_bytes_total[5m])) by (container_name,device)
sum(rate(container_fs_reads_bytes_total[5m])) by (container_name,device)
network
util
sum(rate(container_fs_writes_bytes_total[5m])) by (container_name,device)
sum(rate(container_fs_reads_bytes_total[5m])) by (container_name,device)
satur
container_network_receive_packets_dropped_total container_network_transmit_packets_dropped_total
container_network_receive_errors_total
container_network_transmit_errors_total.