libnvme: add command execution time detector
To improve OOB NVMe-MI debuggability, this patch will check command
execution time, if it exceeds WARNING_THRESHOLD (3 seconds), print
a warning message about the command information.
Tested:
With this patch, we can detect some suspiciously NVMe-MI commands
which may take long time :
https://paste.googleplex.com/4698907106869248
Google-Bug-Id: 300524907#comment20
Change-Id: Id3d7ff62cea1aef40131b8b05eb014d72152ceb8
Signed-off-by: Jinliang Wang <jinliangw@google.com>
(cherry picked from commit 9363819dc9dbcc4c70c381d76229b02b72dbd259)
diff --git a/recipes-connectivity/libnvme/libnvme/0001-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch b/recipes-connectivity/libnvme/libnvme/0006-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch
similarity index 100%
rename from recipes-connectivity/libnvme/libnvme/0001-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch
rename to recipes-connectivity/libnvme/libnvme/0006-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch
diff --git a/recipes-connectivity/libnvme/libnvme/0007-mi-check-and-print-OOB-NVMe-MI-command-execution-tim.patch b/recipes-connectivity/libnvme/libnvme/0007-mi-check-and-print-OOB-NVMe-MI-command-execution-tim.patch
new file mode 100644
index 0000000..ecec32e
--- /dev/null
+++ b/recipes-connectivity/libnvme/libnvme/0007-mi-check-and-print-OOB-NVMe-MI-command-execution-tim.patch
@@ -0,0 +1,106 @@
+From e825dd67cd8bb02a1da8faefd244b2a50dc62dbc Mon Sep 17 00:00:00 2001
+From: Jinliang Wang <jinliangw@google.com>
+Date: Thu, 26 Oct 2023 15:49:34 -0700
+Subject: [PATCH] mi: check and print OOB NVMe-MI command execution time
+
+Check command execution time, if it exceeds WARNING_THRESHOLD
+(3 seconds), print a warning message about the command.
+
+Patch Tracking Bug: b/311315898
+Upstream info / review: N/A
+Upstream-Status: Pending
+Justification: Improve NVMe-MI debuggability
+
+Signed-off-by: Jinliang Wang <jinliangw@google.com>
+---
+ src/nvme/mi.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 61 insertions(+)
+
+diff --git a/src/nvme/mi.c b/src/nvme/mi.c
+index a41f786..4c5e5eb 100644
+--- a/src/nvme/mi.c
++++ b/src/nvme/mi.c
+@@ -378,10 +378,63 @@ static int nvme_mi_verify_resp_mic(struct nvme_mi_resp *resp)
+ return resp->mic != ~crc;
+ }
+
++#define GET_MI_MSG_TYPE(x) ((x >> 3) & 0xF)
++#define WARNING_THRESHOLD 3 /* seconds */
++static void check_execution_time(nvme_mi_ep_t ep,
++ const struct nvme_mi_req *req,
++ const struct timespec *submit_time)
++{
++ struct timespec complete_time = {0, 0};
++ struct timespec deta_time = {0, 0};
++ struct nvme_mi_admin_req_hdr *admin_req_hdr = NULL;
++ struct nvme_mi_mi_req_hdr *mi_req_hdr = NULL;
++
++ clock_gettime(CLOCK_MONOTONIC, &complete_time);
++
++ timespec_sub(&complete_time, submit_time, &deta_time);
++ if (deta_time.tv_sec >= WARNING_THRESHOLD)
++ {
++ if (req->hdr->type == NVME_MI_MSGTYPE_NVME &&
++ GET_MI_MSG_TYPE(req->hdr->nmp) == NVME_MI_MT_ADMIN)
++ {
++ admin_req_hdr = (struct nvme_mi_admin_req_hdr *)req->hdr;
++ nvme_msg(ep->root, LOG_WARNING,
++ "%s, Admin op:0x%02x, cdw(1:0x%x, 10:0x%x, 11:0x%x,"
++ " 12:0x%x 13:0x%x, 14:0x%x, 15:0x%x) "
++ "T:%dms, elapsed:%dms\n",
++ nvme_mi_endpoint_desc(ep),
++ admin_req_hdr->opcode,
++ admin_req_hdr->cdw1,
++ admin_req_hdr->cdw10,
++ admin_req_hdr->cdw11,
++ admin_req_hdr->cdw12,
++ admin_req_hdr->cdw13,
++ admin_req_hdr->cdw14,
++ admin_req_hdr->cdw15,
++ ep->timeout,
++ (int)(deta_time.tv_sec * 1000 + deta_time.tv_nsec / 1000000));
++ }
++ else if (req->hdr->type == NVME_MI_MSGTYPE_NVME &&
++ GET_MI_MSG_TYPE(req->hdr->nmp) == NVME_MI_MT_MI)
++ {
++ mi_req_hdr = (struct nvme_mi_mi_req_hdr *)req->hdr;
++ nvme_msg(ep->root, LOG_WARNING,
++ "%s, MI op:0x%02x, cdw0:0x%x, cdw1:0x%x T:%dms, elapsed:%dms\n",
++ nvme_mi_endpoint_desc(ep),
++ mi_req_hdr->opcode,
++ mi_req_hdr->cdw0,
++ mi_req_hdr->cdw1,
++ ep->timeout,
++ (int)(deta_time.tv_sec * 1000 + deta_time.tv_nsec / 1000000));
++ }
++ }
++}
++
+ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req,
+ struct nvme_mi_resp *resp)
+ {
+ int rc;
++ struct timespec submit_time = {0, 0};
+
+ if (req->hdr_len < sizeof(struct nvme_mi_msg_hdr)) {
+ errno = EINVAL;
+@@ -419,8 +472,16 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req,
+ if (nvme_mi_ep_has_quirk(ep, NVME_QUIRK_MIN_INTER_COMMAND_TIME))
+ nvme_mi_insert_delay(ep);
+
++ clock_gettime(CLOCK_MONOTONIC, &submit_time);
++
+ rc = ep->transport->submit(ep, req, resp);
+
++ if ((rc == 0) || (errno == ETIMEDOUT))
++ {
++ /* We are interested on commands which succeed or timed out*/
++ check_execution_time(ep, req, &submit_time);
++ }
++
+ if (nvme_mi_ep_has_quirk(ep, NVME_QUIRK_MIN_INTER_COMMAND_TIME))
+ nvme_mi_record_resp_time(ep);
+
+--
+2.43.0.rc0.421.g78406f8d94-goog
+
diff --git a/recipes-connectivity/libnvme/libnvme_git.bb b/recipes-connectivity/libnvme/libnvme_git.bb
index dd4d257..f1c4992 100644
--- a/recipes-connectivity/libnvme/libnvme_git.bb
+++ b/recipes-connectivity/libnvme/libnvme_git.bb
@@ -18,7 +18,8 @@
file://0003-mi-fix-the-rc-for-nvme_mi_scan_ep.patch \
file://0004-mi-avoid-changing-default-ep-timeout-during-admin_pa.patch \
file://0005-libnvme-mi-add-timeout-functions-to-.map.patch \
- file://0001-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch \
+ file://0006-mi-don-t-return-from-mi_mctp_submit-with-a-tag-held.patch \
+ file://0007-mi-check-and-print-OOB-NVMe-MI-command-execution-tim.patch \
"
S = "${WORKDIR}/git"