dbus-sensors: nvmesensor: refactor createVolume and createNamespace
Refactor createVolume and createNamespace implementation to avoid
sd-bus assertion.
Tested:
b/309864477#comment8
Google-Bug-Id: 309864477
Google-Bug-Id: 308007717
Change-Id: I9a7b8f74801f030b0b3785e74ab1a625a70e3fa3
Signed-off-by: Jinliang Wang <jinliangw@google.com>
(cherry picked from commit 33715b21f07b436f5d0965dc5585a0d66e6c3b64)
diff --git a/recipes-phosphor/sensors/dbus-sensors/0022-nvmesensor-refactor-createVolume-and-createNamespace.patch b/recipes-phosphor/sensors/dbus-sensors/0022-nvmesensor-refactor-createVolume-and-createNamespace.patch
new file mode 100644
index 0000000..70b6651
--- /dev/null
+++ b/recipes-phosphor/sensors/dbus-sensors/0022-nvmesensor-refactor-createVolume-and-createNamespace.patch
@@ -0,0 +1,339 @@
+From 62791d833aa047c5d5a2312231b7c850dc46c607 Mon Sep 17 00:00:00 2001
+From: Jinliang Wang <jinliangw@google.com>
+Date: Mon, 13 Nov 2023 23:54:18 -0800
+Subject: [PATCH] nvmesensor: refactor createVolume and createNamespace
+
+nvmesensor will meet below assertion when creating volumes on multiple
+NVMeSubsystem parallelly:
+Assertion '!bus->current_slot' failed at c/debug/systemd/1_253.3-r0/src/libsystemd/sd-bus/sd-bus.c:3212, function bus_process_internal(). Aborting.
+We are not sure about the root cause of the assertion(it seems to
+be related to the finished_cb). But we can workaround the assertion
+by simplifying the current implementation:
+1) We don't need to `getBlockSize` on every NVMeMi::createNamespace
+ request. During start up, `NVMeSubsystem::querySupportedFormats'
+ already read it, so we can save it and re-use it. By this way,
+ we can avoid the 4KB nvme_mi_admin_identify_ns read command.
+2) Since we don't need `getBlockSize`, we can merge the original
+ `submitted_cb` and `finished_cb` into one callback
+3) Even we don't need async NVMeCreateVolumeProgress task for
+ `createVolume`, but the Redfish interface is already used by other
+ client. To keep compatible with existing clients, so we still create
+ a NVMeCreateVolumeProgress, and mark it as finished right away.
+
+Tested:
+With the refactor, the assertion doesn't happen, and passed for 100
+loops of parallel createVolume test.
+
+Change-Id: Ic7984f93ce45cf152bace3dc8aacaf7eaff421bc
+Signed-off-by: Jinliang Wang <jinliangw@google.com>
+---
+ src/NVMeIntf.hpp | 6 ++--
+ src/NVMeMi.cpp | 80 ++++++++++------------------------------------
+ src/NVMeMi.hpp | 5 ++-
+ src/NVMeSubsys.cpp | 79 +++++++++++++++++++++------------------------
+ src/NVMeSubsys.hpp | 7 ++++
+ 5 files changed, 64 insertions(+), 113 deletions(-)
+
+diff --git a/src/NVMeIntf.hpp b/src/NVMeIntf.hpp
+index 23219b2..d5c8023 100644
+--- a/src/NVMeIntf.hpp
++++ b/src/NVMeIntf.hpp
+@@ -212,10 +212,8 @@ class NVMeMiIntf
+
+ virtual void createNamespace(
+ nvme_mi_ctrl_t ctrl, uint64_t size, size_t lba_format,
+- bool metadata_at_end,
+- std::function<void(nvme_ex_ptr ex)>&& submitted_cb,
+- std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&&
+- finished_cb) = 0;
++ bool metadata_at_end, size_t block_size,
++ std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&& cb) = 0;
+
+ virtual void adminDeleteNamespace(
+ nvme_mi_ctrl_t ctrl, uint32_t nsid,
+diff --git a/src/NVMeMi.cpp b/src/NVMeMi.cpp
+index 270b5c6..6b1a251 100644
+--- a/src/NVMeMi.cpp
++++ b/src/NVMeMi.cpp
+@@ -1317,41 +1317,16 @@ size_t NVMeMi::getBlockSize(nvme_mi_ctrl_t ctrl, size_t lba_format)
+ return 1 << id.lbaf[lba_format].ds;
+ }
+
+-/*
+- finished_cb will not be called if submitted_cb is called with a failure.
+- */
+ void NVMeMi::createNamespace(
+ nvme_mi_ctrl_t ctrl, uint64_t size, size_t lba_format, bool metadata_at_end,
+- std::function<void(nvme_ex_ptr ex)>&& submitted_cb,
+- std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&& finished_cb)
++ size_t block_size,
++ std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&& cb)
+ {
+- printf("createns %d\n", (int)gettid());
+- std::error_code post_err = try_post(
+- [self{shared_from_this()}, ctrl, size, lba_format, metadata_at_end,
+- submitted_cb{std::move(submitted_cb)},
+- finished_cb{std::move(finished_cb)}]() {
+- size_t block_size;
+-
+- try
+- {
+- block_size = self->getBlockSize(ctrl, lba_format);
+- }
+- catch (nvme_ex_ptr e)
+- {
+- submitted_cb(e);
+- return;
+- }
+-
+- if (size % block_size != 0)
+- {
+- auto msg =
+- std::string("Size must be a multiple of the block size ") +
+- std::to_string(block_size);
+- submitted_cb(makeLibNVMeError(
+- msg, std::make_shared<CommonErr::InvalidArgument>()));
+- return;
+- }
+-
++ std::cerr << "eid:" << static_cast<int>(eid) << " NVMeMi::createNamespace"
++ << std::endl;
++ std::error_code post_err =
++ try_post([self{shared_from_this()}, ctrl, size, lba_format,
++ metadata_at_end, block_size, cb{std::move(cb)}]() {
+ uint64_t blocks = size / block_size;
+
+ // TODO: this will become nvme_ns_mgmt_host_sw_specified in a newer
+@@ -1374,18 +1349,16 @@ void NVMeMi::createNamespace(
+ data.ncap = ::htole64(blocks);
+ data.flbas = flbas;
+
+- printf("verified %d\n", (int)gettid());
+-
+- // submission has been verified.
+- submitted_cb(nvme_ex_ptr());
+- printf("after submitted_cb %d\n", (int)gettid());
+-
++ std::cerr << "eid:" << static_cast<int>(self->eid)
++ << " nvme_mi_admin_ns_mgmt_create start" << std::endl;
+ unsigned timeout = nvme_mi_ep_get_timeout(self->nvmeEP);
+ nvme_mi_ep_set_timeout(self->nvmeEP, namespaceDefaultTimeoutMS);
+ int status = nvme_mi_admin_ns_mgmt_create(ctrl, &data, 0, &new_nsid);
+ nvme_mi_ep_set_timeout(self->nvmeEP, timeout);
++ std::cerr << "eid:" << static_cast<int>(self->eid)
++ << " nvme_mi_admin_ns_mgmt_create done" << std::endl;
+
+- nvme_ex_ptr e = makeLibNVMeError(errno, status, "createVolume");
++ nvme_ex_ptr e = makeLibNVMeError(errno, status, "createNamespace");
+
+ NVMeNSIdentify newns = {
+ .namespaceId = new_nsid,
+@@ -1396,35 +1369,16 @@ void NVMeMi::createNamespace(
+ .metadataAtEnd = metadata_at_end,
+ };
+
+- self->io.post([finished_cb{std::move(finished_cb)}, e, newns]() {
+- finished_cb(e, newns);
+- });
+-
+-#if 0
+- // TODO testing purposes
+- static uint32_t counter = 20;
+-
+- printf("createNamespace top, sleeping 5 seconds\n");
+- sleep(5);
+-
+- uint32_t new_ns = counter++;
+-
+- printf("create complete. ns %d\n", new_ns);
+-
+- auto err = std::make_error_code(static_cast<std::errc>(0));
+- cb(err, 0, new_ns);
+-#endif
++ self->io.post([cb{std::move(cb)}, e, newns]() { cb(e, newns); });
+ });
+
+- printf("submitted cb %d\n", (int)gettid());
+-
+ if (post_err)
+ {
+- std::cerr << "adminAttachDetachNamespace post failed: " << post_err
++ std::cerr << "eid:" << static_cast<int>(eid)
++ << "NVMeMi::createNamespace post failed: " << post_err
+ << std::endl;
+- auto e = makeLibNVMeError(post_err, -1, "createVolume");
+- io.post(
+- [submitted_cb{std::move(submitted_cb)}, e]() { submitted_cb(e); });
++ auto e = makeLibNVMeError(post_err, -1, "createNamespace");
++ io.post([cb{std::move(cb)}, e]() { cb(e, NVMeNSIdentify()); });
+ }
+ }
+
+diff --git a/src/NVMeMi.hpp b/src/NVMeMi.hpp
+index 05d0a6e..36f9a2b 100644
+--- a/src/NVMeMi.hpp
++++ b/src/NVMeMi.hpp
+@@ -73,9 +73,8 @@ class NVMeMi : public NVMeMiIntf, public std::enable_shared_from_this<NVMeMi>
+
+ void createNamespace(
+ nvme_mi_ctrl_t ctrl, uint64_t size, size_t lba_format,
+- bool metadata_at_end,
+- std::function<void(nvme_ex_ptr ex)>&& submitted_cb,
+- std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&& finished_cb)
++ bool metadata_at_end, size_t block_size,
++ std::function<void(nvme_ex_ptr ex, NVMeNSIdentify newid)>&& cb)
+ override;
+
+ void adminDeleteNamespace(
+diff --git a/src/NVMeSubsys.cpp b/src/NVMeSubsys.cpp
+index b3290d5..9aaf508 100644
+--- a/src/NVMeSubsys.cpp
++++ b/src/NVMeSubsys.cpp
+@@ -634,59 +634,53 @@ sdbusplus::message::object_path
+ NVMeSubsystem::createVolume(boost::asio::yield_context yield, uint64_t size,
+ size_t lbaFormat, bool metadataAtEnd)
+ {
+- // #0 (sequence of runtime/callbacks)
+- auto prog_id = getRandomId();
++ // Checks argument
++ if (lbaFormat >= supportedFormats.size())
++ {
++ throw *makeLibNVMeError("Invalid lbaFormat",
++ std::make_shared<CommonErr::InvalidArgument>());
++ }
++
++ size_t block_size = supportedFormats[lbaFormat].blockSize;
++ if (size % block_size != 0)
++ {
++ auto msg = std::string("Size must be a multiple of the block size ") +
++ std::to_string(block_size);
++ throw *makeLibNVMeError(msg,
++ std::make_shared<CommonErr::InvalidArgument>());
++ }
+
+ nvme_mi_ctrl_t ctrl = primaryController->getMiCtrl();
+ auto intf = std::get<std::shared_ptr<NVMeMiIntf>>(nvmeIntf.getInferface());
+
+- using submit_callback_t = void(std::tuple<nvme_ex_ptr>);
+- auto [ex] = boost::asio::async_initiate<boost::asio::yield_context,
+- submit_callback_t>(
+- [weak{weak_from_this()}, prog_id, intf, ctrl, size, lbaFormat,
+- metadataAtEnd](auto&& handler) {
++ using callback_t = void(std::tuple<nvme_ex_ptr, NVMeNSIdentify>);
++ auto [ex, newns] =
++ boost::asio::async_initiate<boost::asio::yield_context, callback_t>(
++ [intf, ctrl, size, lbaFormat, metadataAtEnd,
++ block_size](auto&& handler) {
+ auto h = asio_helper::CopyableCallback(std::move(handler));
+
+- // #1
+ intf->createNamespace(
+- ctrl, size, lbaFormat, metadataAtEnd,
+-
+- // submitted_cb
+- [h](nvme_ex_ptr ex) mutable {
+- // #2
+-
+- // Async completion of the createNamespace call.
+- // The actual nvme_mi_admin_ns_mgmt_create() call is still running
+- // in a separate thread. Pass the error status back out.
+- h(std::make_tuple(ex));
+- },
+-
+- // finished_cb
+- [weak, prog_id](nvme_ex_ptr ex, NVMeNSIdentify newns) mutable {
+- // #5. This will only be called once #4 completes.
+- // It will not be called if the submit failed.
+- auto self = weak.lock();
+- if (!self)
+- {
+- std::cerr << "createNamespace completed while nvmesensor was "
+- "exiting\n";
+- return;
+- }
+- // The NS create has completed (either successfully or not)
+- self->createVolumeFinished(prog_id, ex, newns);
++ ctrl, size, lbaFormat, metadataAtEnd, block_size,
++ [h](nvme_ex_ptr ex, NVMeNSIdentify newns) mutable {
++ h(std::make_tuple(ex, newns));
+ });
+- },
+- yield);
+-
+- // #3
++ },
++ yield);
+
+ // Exception must be thrown outside of the async block
+ if (ex)
+ {
++ std::cerr << name
++ << ": createVolume failed, error: " << ex->description()
++ << std::endl;
+ throw *ex;
+ }
+
+- // Progress endpoint for clients to poll, if the submit was successful.
++ // Creates a progress which already succeed.
++ std::string prog_id = getRandomId();
++
++ // Progress endpoint for clients to poll
+ std::string prog_path = path + "/CreateProgress/" + prog_id;
+
+ auto prog = std::make_shared<NVMeCreateVolumeProgress>(conn, prog_path);
+@@ -695,9 +689,8 @@ sdbusplus::message::object_path
+ throw std::logic_error("duplicate progress id");
+ }
+
++ createVolumeFinished(prog_id, ex, newns);
+ updateAssociation();
+-
+- // #4
+ return prog_path;
+ }
+
+@@ -1051,7 +1044,7 @@ void NVMeSubsystem::querySupportedFormats()
+ }
+
+ std::cerr << self->name << ": Got nlbaf " << nlbaf << "\n";
+- std::vector<LBAFormat> formats;
++ self->supportedFormats.clear();
+ for (size_t i = 0; i < nlbaf; i++)
+ {
+ size_t blockSize = 1ul << id.lbaf[i].ds;
+@@ -1059,12 +1052,12 @@ void NVMeSubsystem::querySupportedFormats()
+ RelPerf rp = relativePerformanceFromRP(id.lbaf[i].rp);
+ std::cerr << self->name << ": lbaf " << i << " blocksize "
+ << blockSize << "\n";
+- formats.push_back({.index = i,
++ self->supportedFormats.push_back({.index = i,
+ .blockSize = blockSize,
+ .metadataSize = metadataSize,
+ .relativePerformance = rp});
+ }
+- self->setSupportedFormats(formats);
++ self->setSupportedFormats(self->supportedFormats);
+ });
+ }
+
+diff --git a/src/NVMeSubsys.hpp b/src/NVMeSubsys.hpp
+index 837f48b..f7d4fd9 100644
+--- a/src/NVMeSubsys.hpp
++++ b/src/NVMeSubsys.hpp
+@@ -108,6 +108,13 @@ class NVMeSubsystem :
+ */
+ std::map<uint32_t, std::shared_ptr<NVMeVolume>> volumes;
+
++ /*
++ Current assumption is that there is only one primary controller in the
++ system and only the PC is priveleged to operate NS creation. Otherwise will
++ move the LBAFormat to controllers.
++ */
++ std::vector<LBAFormat> supportedFormats;
++
+ /*
+ * volumes attached to controllers
+ */
+--
+2.43.0.rc0.421.g78406f8d94-goog
+
diff --git a/recipes-phosphor/sensors/dbus-sensors_%.bbappend b/recipes-phosphor/sensors/dbus-sensors_%.bbappend
index 0c6c6c0..0b78e04 100644
--- a/recipes-phosphor/sensors/dbus-sensors_%.bbappend
+++ b/recipes-phosphor/sensors/dbus-sensors_%.bbappend
@@ -90,6 +90,7 @@
file://0019-nvmesensor-print-makeLibNVMeError-for-debug.patch \
file://0020-nvmesensor-add-primary-info-for-plugins.patch \
file://0021-nvmesensor-use-processSecondaryControllerList-to-rep.patch \
+ file://0022-nvmesensor-refactor-createVolume-and-createNamespace.patch \
"
PACKAGECONFIG[nvmesensor] = "-Dnvme=enabled, -Dnvme=disabled, libnvme"
SYSTEMD_SERVICE:${PN} += "${@bb.utils.contains('PACKAGECONFIG', 'nvmesensor', \