@@ -764,199 +764,7 @@ spec:
764
764
quantile: "0.5"
765
765
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
766
766
- name : node-exporter
767
- rules :
768
- - alert : NodeFilesystemAlmostOutOfSpace
769
- annotations :
770
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
771
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfSpace.md
772
- summary : Filesystem has less than 5% space left.
773
- expr : |
774
- (
775
- node_filesystem_avail_bytes{job="node-exporter",fstype!="shiftfs"} / node_filesystem_size_bytes{job="node-exporter",fstype!="shiftfs"} * 100 < 5
776
- and
777
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
778
- )
779
- for : 15m
780
- labels :
781
- severity : critical
782
- - alert : NodeFilesystemAlmostOutOfSpace
783
- annotations :
784
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
785
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfSpace.md
786
- summary : Filesystem has less than 3% space left.
787
- expr : |
788
- (
789
- node_filesystem_avail_bytes{job="node-exporter",fstype!="shiftfs"} / node_filesystem_size_bytes{job="node-exporter",fstype!="shiftfs"} * 100 < 3
790
- and
791
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
792
- )
793
- for : 15m
794
- labels :
795
- severity : critical
796
- - alert : NodeFilesystemFilesFillingUp
797
- annotations :
798
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
799
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemFilesFillingUp.md
800
- summary : Filesystem is predicted to run out of inodes within the next 24 hours.
801
- expr : |
802
- (
803
- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 40
804
- and
805
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"}[6h], 24*60*60) < 0
806
- and
807
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
808
- )
809
- for : 1h
810
- labels :
811
- severity : warning
812
- - alert : NodeFilesystemFilesFillingUp
813
- annotations :
814
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
815
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemFilesFillingUp.md
816
- summary : Filesystem is predicted to run out of inodes within the next 4 hours.
817
- expr : |
818
- (
819
- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 20
820
- and
821
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"}[6h], 4*60*60) < 0
822
- and
823
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
824
- )
825
- for : 1h
826
- labels :
827
- severity : critical
828
- - alert : NodeFilesystemAlmostOutOfFiles
829
- annotations :
830
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
831
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfFiles.md
832
- summary : Filesystem has less than 5% inodes left.
833
- expr : |
834
- (
835
- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 5
836
- and
837
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
838
- )
839
- for : 1h
840
- labels :
841
- severity : warning
842
- - alert : NodeFilesystemAlmostOutOfFiles
843
- annotations :
844
- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
845
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfFiles.md
846
- summary : Filesystem has less than 3% inodes left.
847
- expr : |
848
- (
849
- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 3
850
- and
851
- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
852
- )
853
- for : 1h
854
- labels :
855
- severity : critical
856
- - alert : NodeNetworkReceiveErrs
857
- annotations :
858
- description : ' {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
859
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeNetworkReceiveErrs.md
860
- summary : Network interface is reporting many receive errors.
861
- expr : |
862
- rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
863
- for : 1h
864
- labels :
865
- severity : warning
866
- - alert : NodeNetworkTransmitErrs
867
- annotations :
868
- description : ' {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
869
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeNetworkTransmitErrs.md
870
- summary : Network interface is reporting many transmit errors.
871
- expr : |
872
- rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
873
- for : 1h
874
- labels :
875
- severity : warning
876
- - alert : NodeTextFileCollectorScrapeError
877
- annotations :
878
- description : Node Exporter text file collector failed to scrape.
879
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeTextFileCollectorScrapeError.md
880
- summary : Node Exporter text file collector failed to scrape.
881
- expr : |
882
- node_textfile_scrape_error{job="node-exporter"} == 1
883
- labels :
884
- severity : warning
885
- - alert : NodeClockSkewDetected
886
- annotations :
887
- description : Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
888
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeClockSkewDetected.md
889
- summary : Clock skew detected.
890
- expr : |
891
- (
892
- node_timex_offset_seconds{job="node-exporter"} > 0.05
893
- and
894
- deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
895
- )
896
- or
897
- (
898
- node_timex_offset_seconds{job="node-exporter"} < -0.05
899
- and
900
- deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
901
- )
902
- for : 10m
903
- labels :
904
- severity : warning
905
- - alert : NodeClockNotSynchronising
906
- annotations :
907
- description : Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
908
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeClockNotSynchronising.md
909
- summary : Clock not synchronising.
910
- expr : |
911
- min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
912
- and
913
- node_timex_maxerror_seconds{job="node-exporter"} >= 16
914
- for : 10m
915
- labels :
916
- severity : warning
917
- - alert : NodeRAIDDegraded
918
- annotations :
919
- description : RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
920
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeRAIDDegraded.md
921
- summary : RAID Array is degraded
922
- expr : |
923
- node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"}) > 0
924
- for : 15m
925
- labels :
926
- severity : critical
927
- - alert : NodeRAIDDiskFailure
928
- annotations :
929
- description : At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
930
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeRAIDDiskFailure.md
931
- summary : Failed device in RAID array
932
- expr : |
933
- node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} > 0
934
- labels :
935
- severity : warning
936
- - alert : NodeFileDescriptorLimit
937
- annotations :
938
- description : File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
939
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFileDescriptorLimit.md
940
- summary : Kernel is predicted to exhaust file descriptors limit soon.
941
- expr : |
942
- (
943
- node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
944
- )
945
- for : 15m
946
- labels :
947
- severity : warning
948
- - alert : NodeFileDescriptorLimit
949
- annotations :
950
- description : File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
951
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFileDescriptorLimit.md
952
- summary : Kernel is predicted to exhaust file descriptors limit soon.
953
- expr : |
954
- (
955
- node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
956
- )
957
- for : 15m
958
- labels :
959
- severity : critical
767
+ rules : []
960
768
- name : node-exporter.rules
961
769
rules :
962
770
- expr : |
0 commit comments