NVMeMI: Flush the MI tasks before resetting MCTP conn
Previously, when the stop will call. We will immediately
transition into RESET state and free up EP resources. This
would result in SEGV if the worker still have remaining tasks.
With the new change, when the stop is called. We will first
transition into terminating state which will stop from further
tasks to be queued and to flush the currently tasks already
queued.
Google-Bug-Id: 372696551
Change-Id: Ia46d1a93e6533eccfe34b6b6fae4e1d9152e7500
Signed-off-by: Muhammad Usama <muhammadusama@google.com>
diff --git a/src/NVMeMi.cpp b/src/NVMeMi.cpp
index 775e6fd..864c3dc 100644
--- a/src/NVMeMi.cpp
+++ b/src/NVMeMi.cpp
@@ -27,7 +27,7 @@
int addr, bool singleThreadMode, PowerState readState) :
io(io), conn(conn), dbus(*conn.get()), bus(bus), addr(addr),
readState(readState), nvmeEP(nullptr), nid(-1), eid(0), mtu(64),
- optimizeTimer(nullptr), mctpStatus(Status::Reset)
+ restart(false), optimizeTimer(nullptr), mctpStatus(Status::Reset)
{
// set update the worker thread
if (!nvmeRoot)
@@ -81,16 +81,15 @@
void NVMeMi::start()
{
- // Already in start loop
- if (optimizeTimer)
+ if (mctpStatus == Status::Terminating)
{
+ restart = true;
return;
}
if (mctpStatus == Status::Reset)
{
// init mctp ep via mctpd
- std::unique_lock<std::mutex> lock(mctpMtx);
std::cerr << "[bus: " << bus << ", addr: " << addr << "]"
<< "start MCTP initialization" << std::endl;
int i = 0;
@@ -198,7 +197,8 @@
void NVMeMi::stop()
{
- if (mctpStatus == Status::Reset)
+ restart = false;
+ if (mctpStatus == Status::Reset || mctpStatus == Status::Terminating)
{
return;
}
@@ -211,40 +211,48 @@
optimizeTimer->cancel();
optimizeTimer = nullptr;
}
- // each nvme mi message transaction should take relatively short time
- // (typically <= 200 ms). So the blocking time should be short
- std::unique_lock<std::mutex> lock(mctpMtx);
+ mctpStatus = Status::Terminating;
std::cerr << "[bus: " << bus << ", addr: " << addr << "]"
<< "start MCTP closure" << std::endl;
- nvme_mi_close(nvmeEP);
-
- // Note: No need to remove MCTP ep from MCTPd since the routing table will
- // re-establish on the next init
-
- mctpStatus = Status::Reset;
- nid = -1;
- eid = 0;
- mtu = 64;
- mctpPath.erase();
- nvmeEP = nullptr;
- std::cerr << "[bus: " << bus << ", addr: " << addr << "]"
- << "finish MCTP closure." << std::endl;
-}
-
-bool NVMeMi::isMCTPconnect() const
-{
- return mctpStatus == Status::Connected;
+ // Invoke nvme_mi_close() via a lambda that we schedule via
+ // flushOperations(). Using flushOperations() ensures that any
+ // outstanding tasks are executed before nvme_mi_close() is invoked,
+ // invalidating their controller reference.
+ flushOperations([self{shared_from_this()}]() {
+ nvme_mi_close(self->nvmeEP);
+ self->mtu = 64;
+ self->nid = -1;
+ self->eid = 0;
+ self->mctpPath.erase();
+ self->nvmeEP = nullptr;
+ self->mctpStatus = Status::Reset;
+ std::cerr << "[bus: " << self->bus << ", addr: " << self->addr << "]"
+ << "end MCTP closure" << std::endl;
+ if (self->restart)
+ {
+ // If restart is true then we've received update Power
+ // signal while in terminating loop.
+ self->restart = false;
+ self->start();
+ }
+ });
}
std::optional<std::error_code> NVMeMi::isEndpointDegraded() const
{
- if (!isMCTPconnect())
+ switch (mctpStatus)
{
- return std::make_error_code(std::errc::no_such_device);
+ case Status::Reset:
+ return std::make_error_code(std::errc::no_such_device);
+ case Status::Initiated:
+ return std::make_error_code(std::errc::not_connected);
+ case Status::Connected:
+ return std::nullopt;
+ case Status::Terminating:
+ return std::make_error_code(std::errc::not_connected);
}
-
- return std::nullopt;
+ throw std::logic_error("Unreachable");
}
NVMeMi::Worker::Worker()
@@ -309,11 +317,7 @@
void NVMeMi::post(std::function<void(void)>&& func)
{
- worker->post(
- [self{std::move(shared_from_this())}, func{std::move(func)}]() {
- std::unique_lock<std::mutex> lock(self->mctpMtx);
- func();
- });
+ worker->post([func{std::move(func)}]() { func(); });
}
// Calls .post(), catching runtime_error and returning an error code on failure.
@@ -337,7 +341,7 @@
uint8_t port_id, uint8_t max_supported_freq,
std::function<void(const std::error_code&)>&& cb)
{
- if (mctpStatus == Status::Reset)
+ if (mctpStatus == Status::Reset || mctpStatus == Status::Terminating)
{
std::cerr << "[bus: " << bus << ", addr: " << addr
<< ", eid: " << static_cast<int>(eid) << "]"
@@ -402,7 +406,7 @@
uint8_t port, uint16_t mtu, uint8_t max_supported_freq,
std::function<void(const std::error_code&)>&& cb)
{
- if (mctpStatus == Status::Reset)
+ if (mctpStatus == Status::Reset || mctpStatus == Status::Terminating)
{
std::cerr << "[bus: " << bus << ", addr: " << addr
<< ", eid: " << static_cast<int>(eid) << "]"
@@ -459,7 +463,7 @@
void NVMeMi::miSetMCTPConfiguration(
std::function<void(const std::error_code&)>&& cb)
{
- if (mctpStatus == Status::Reset)
+ if (mctpStatus == Status::Reset || mctpStatus == Status::Terminating)
{
std::cerr << "[bus: " << bus << ", addr: " << addr
<< ", eid: " << static_cast<int>(eid) << "]"
diff --git a/src/NVMeMi.hpp b/src/NVMeMi.hpp
index 6dca9e0..9da153c 100644
--- a/src/NVMeMi.hpp
+++ b/src/NVMeMi.hpp
@@ -127,8 +127,6 @@
uint16_t mtu;
std::string mctpPath;
- std::mutex mctpMtx;
-
// A worker thread for calling NVMeMI cmd.
class Worker
{
@@ -147,19 +145,26 @@
void post(std::function<void(void)>&& func);
};
- // A state machine to represent the current status of the MCTP connection.
- // In Reset state, the MCTP endpoint (EP) is not setup with the device. In
- // event of successful setup and opening of EP, the status will change to
- // Initiated. The status will change to Connected, once the MTU of local and
- // device side MTU and frequency is changed. In an event of connection EP
- // closure, the status will move back to Reset.
+ /*
+ * A state machine to represent the current status of the MCTP connection.
+ * In Reset state, the MCTP endpoint (EP) is not setup with the device.
+ * In event of successful setup and opening the EP is successful from
+ * the status will change to Initiated. The status will change to Connected
+ * once the MTU of local and device side MTU and frequency is optimized.
+ * In an event of connection EP closure, the status will move back to Reset
+ * via Terminating.
+ */
+
enum class Status
{
Reset,
Initiated,
Connected,
+ Terminating,
};
+ // Handle a start() while in Status::Terminating on entry to Status::Reset.
+ bool restart;
std::shared_ptr<boost::asio::steady_timer> optimizeTimer;
Status mctpStatus;
@@ -191,8 +196,6 @@
int configureLocalRouteMtu();
- bool isMCTPconnect() const;
-
std::optional<std::error_code> isEndpointDegraded() const;
bool readingStateGood() const