ydb-platform
diff --git a/‎ydb/docs/en/core/reference/ydb-sdk/_assets/hc_types_hierarchy.png
21.2 KB b/‎ydb/docs/en/core/reference/ydb-sdk/_assets/hc_types_hierarchy.png
21.2 KB
diff --git a/‎ydb/docs/en/core/reference/ydb-sdk/health-check-api.md
+215-14 b/‎ydb/docs/en/core/reference/ydb-sdk/health-check-api.md
+215-14
diff --git a/‎ydb/docs/ru/core/reference/ydb-sdk/_assets/hc_types_hierarchy.png
21.2 KB b/‎ydb/docs/ru/core/reference/ydb-sdk/_assets/hc_types_hierarchy.png
21.2 KB
@@ -19,13 +19,6 @@ message SelfCheckResult {
 }
 ```
 
-The shortest `HealthCheck` response looks like this. It is returned if there is nothing wrong with the database
-```protobuf
-SelfCheckResult {
-    self_check_result: GOOD
-}
-```
-
 If any issues are detected, the `issue_log` field will contain descriptions of the problems with the following structure:
 ```protobuf
 message IssueLog {
@@ -84,17 +77,17 @@ struct TSelfCheckSettings : public TOperationRequestSettings<TSelfCheckSettings>
 | **DATABASE** ||
 | `Database has multiple issues`</br>`Database has compute issues`</br>`Database has storage issues` | These issues depend solely on the underlying `COMPUTE` and `STORAGE` layers. This is the most general status of the database. |
 | **STORAGE** ||
-| `There are no storage pools` | Unable to determine `STORAGE_POOLS` issues below. |
+| `There are no storage pools` | Storage pools aren't configured. |
 | `Storage degraded`</br>`Storage has no redundancy`</br>`Storage failed` | These issues depend solely on the underlying `STORAGE_POOLS` layer. |
 | `System tablet BSC didn't provide information` | Storage diagnostics will be generated with alternative way. |
 | `Storage usage over 75%/85%/90%` | Need to increase disk space. |
 | **STORAGE_POOL** ||
 | `Pool degraded/has no redundancy/failed` | These issues depend solely on the underlying `STORAGE_GROUP` layer. |
 | **STORAGE_GROUP** ||
-| `Group has no vslots` ||
+| `Group has no vslots` | This case is not expected, it inner problem. |
 | `Group degraded` | The number of disks allowed in the group is not available. |
-| `Group has no redundancy` | A storage group lost its redundancy. |
-| `Group failed` | A storage group lost its integrity. |
+| `Group has no redundancy` | A storage group lost its redundancy. Аnother failure of vdisk may lead to the loss of the group. |
+| `Group failed` | A storage group lost its integrity. Data is not available |
 ||`HealthCheck` checks various parameters (fault tolerance mode, number of failed disks, disk status, etc.) and, depending on this, sets the appropriate status and displays a message. |
 | **VDISK** ||
 | `System tablet BSC didn't provide known status` | This case is not expected, it inner problem. |
@@ -129,6 +122,214 @@ struct TSelfCheckSettings : public TOperationRequestSettings<TSelfCheckSettings>
 | **COMPUTE_POOL** ||
 | `Pool usage is over than 90/95/99%` | One of the pools' CPUs is overloaded. |
 | **NODE_UPTIME** ||
-| `Node is restarting too often/The number of node restarts has increased` | The number of node restarts has exceeded the threshold. |
-| **NODES_SYNC** ||
-| `The nodes have a time difference of ... ms` | Time drift on nodes might lead to potential issues with coordinating distributed transactions. |
+| `The number of node restarts has increased` | The number of node restarts has exceeded the threshold. By default, 10 restarts per hour |
+| `Node is restarting too often` | The number of node restarts has exceeded the threshold. By default, 30 restarts per hour |
+| **NODES_TIME_DIFFERENCE** ||
+| `The nodes have a time difference of ... ms` | Time drift on nodes might lead to potential issues with coordinating distributed transactions. This message starts to appear from 5 ms |
+
+
+## Example {#examples}
+The shortest `HealthCheck` response looks like this. It is returned if there is nothing wrong with the database
+```json
+{
+  "self_check_result": "GOOD"
+}
+```
+
+Response with `EMERGENCY` status
+```json
+{
+  "self_check_result": "EMERGENCY",
+  "issue_log": [
+    {
+      "id": "RED-27c3-70fb",
+      "status": "RED",
+      "message": "Database has multiple issues",
+      "location": {
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "RED-27c3-4e47",
+        "RED-27c3-53b5",
+        "YELLOW-27c3-5321"
+      ],
+      "type": "DATABASE",
+      "level": 1
+    },
+    {
+      "id": "RED-27c3-4e47",
+      "status": "RED",
+      "message": "Compute has issues with system tablets",
+      "location": {
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "RED-27c3-c138-BSController"
+      ],
+      "type": "COMPUTE",
+      "level": 2
+    },
+    {
+      "id": "RED-27c3-c138-BSController",
+      "status": "RED",
+      "message": "System tablet is unresponsive",
+      "location": {
+        "compute": {
+          "tablet": {
+            "type": "BSController",
+            "id": [
+              "72057594037989391"
+            ]
+          }
+        },
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "type": "SYSTEM_TABLET",
+      "level": 3
+    },
+    {
+      "id": "RED-27c3-53b5",
+      "status": "RED",
+      "message": "System tablet BSC didn't provide information",
+      "location": {
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "type": "STORAGE",
+      "level": 2
+    },
+    {
+      "id": "YELLOW-27c3-5321",
+      "status": "YELLOW",
+      "message": "Storage degraded",
+      "location": {
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "YELLOW-27c3-595f-8d1d"
+      ],
+      "type": "STORAGE",
+      "level": 2
+    },
+    {
+      "id": "YELLOW-27c3-595f-8d1d",
+      "status": "YELLOW",
+      "message": "Pool degraded",
+      "location": {
+        "storage": {
+          "pool": {
+            "name": "static"
+          }
+        },
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "YELLOW-27c3-ef3e-0"
+      ],
+      "type": "STORAGE_POOL",
+      "level": 3
+    },
+    {
+      "id": "RED-84d8-3-3-1",
+      "status": "RED",
+      "message": "PDisk is not available",
+      "location": {
+        "storage": {
+          "node": {
+            "id": 3,
+            "host": "man0-0026.ydb-dev.nemax.nebiuscloud.net",
+            "port": 19001
+          },
+          "pool": {
+            "group": {
+              "vdisk": {
+                "pdisk": [
+                  {
+                    "id": "3-1",
+                    "path": "/dev/disk/by-partlabel/NVMEKIKIMR01"
+                  }
+                ]
+              }
+            }
+          }
+        }
+      },
+      "type": "PDISK",
+      "level": 6
+    },
+    {
+      "id": "RED-27c3-4847-3-0-1-0-2-0",
+      "status": "RED",
+      "message": "VDisk is not available",
+      "location": {
+        "storage": {
+          "node": {
+            "id": 3,
+            "host": "man0-0026.ydb-dev.nemax.nebiuscloud.net",
+            "port": 19001
+          },
+          "pool": {
+            "name": "static",
+            "group": {
+              "vdisk": {
+                "id": [
+                  "0-1-0-2-0"
+                ]
+              }
+            }
+          }
+        },
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "RED-84d8-3-3-1"
+      ],
+      "type": "VDISK",
+      "level": 5
+    },
+    {
+      "id": "YELLOW-27c3-ef3e-0",
+      "status": "YELLOW",
+      "message": "Group degraded",
+      "location": {
+        "storage": {
+          "pool": {
+            "name": "static",
+            "group": {
+              "id": [
+                "0"
+              ]
+            }
+          }
+        },
+        "database": {
+          "name": "/slice"
+        }
+      },
+      "reason": [
+        "RED-27c3-4847-3-0-1-0-2-0"
+      ],
+      "type": "STORAGE_GROUP",
+      "level": 4
+    }
+  ],
+  "location": {
+    "id": 5,
+    "host": "man0-0028.ydb-dev.nemax.nebiuscloud.net",
+    "port": 19001
+  }
+}
+```