Make sure tlbmc sensors are dynamic

Certain sensors can be deleted, e.g., PSU sensors, then recreated when the power gets resumed. tlbmc used to keep the sensor unchanged, which results in stale input file descriptor.

This commit adds a temporary solution to recreate input device when old device returns "not found". An ideal solution is using IPC to track host power or integrate host power (GPIO monitoring) into tlbmc, which is a goal in the next iteration.

Also add sensor device path into the Redfish error message for debuggability.

On real machine,

```
# device rejoin
root@dddhh9-nfd01:~# echo adm1272 > /sys/bus/i2c/devices/i2c-30/new_device 0x1f
root@dddhh9-nfd01:~# ls /sys/bus/i2c/devices/i2c-30/30-001f/
driver     hwmon      modalias   name       power      subsystem  uevent
root@dddhh9-nfd01:~# ls /sys/bus/i2c/devices/i2c-30/30-001f/hwmon/
hwmon9
root@dddhh9-nfd01:~# ls /sys/bus/i2c/devices/i2c-30/30-001f/hwmon/hwmon9/
curr1_highest         in1_highest           in2_highest           in_samples            power1_max            temp1_max
curr1_input           in1_input             in2_input             name                  power1_reset_history  temp1_max_alarm
curr1_label           in1_label             in2_label             of_node               power_samples         temp1_reset_history
curr1_max             in1_max               in2_max               power                 subsystem             uevent
curr1_max_alarm       in1_max_alarm         in2_max_alarm         power1_alarm          temp1_crit
curr1_reset_history   in1_min               in2_min               power1_input          temp1_crit_alarm
curr_samples          in1_min_alarm         in2_min_alarm         power1_input_highest  temp1_highest
device                in1_reset_history     in2_reset_history     power1_label          temp1_input

```

Before this change,
```
root@dddhh9-nfd01:~# curl localhost:18080/redfish/v1/Chassis/BigGulp_1/Sensors/power_hotswap_in_Input_Power
{
  "error": {
    "message": "Failed to read from input device: No such device; input device path: /sys/bus/i2c/devices/i2c-30/30-001f/hwmon/hwmon9/power1_input"
  }
```

After this change:
```
root@dddhh9-nfd01:~# curl localhost:18080/redfish/v1/Chassis/BigGulp_1/Sensors/power_hotswap_in_Input_Power
{
  "@odata.id": "/redfish/v1/Chassis/BigGulp_1/Sensors/power_hotswap_in_Input_Power",
  "@odata.type": "#Sensor.v1_2_0.Sensor",
  "Description": "Sensor",
  "Id": "power_hotswap_in_Input_Power",
  "Name": "hotswap in Input Power",
  "Reading": 22.306597,
  "ReadingRangeMax": 4700.0,
  "ReadingRangeMin": -11.0,
  "ReadingType": "Power",
  "ReadingUnits": "W",
  "RelatedItem": [
    {
      "@odata.id": "/redfish/v1/Chassis/BigGulp_1"
    }
  ],
  "Status": {
    "Health": "OK",
    "State": "Enabled"
  },
  "Thresholds": {
    "LowerCritical": {
      "Reading": -10.0
    },
    "UpperCritical": {
      "Reading": 4600.0
    }
  }
}
```

PiperOrigin-RevId: 744088791
Change-Id: I92b52d410ba24b9a264963b2e62e5597f21ea96a
diff --git a/tlbmc/redfish/response.cc b/tlbmc/redfish/response.cc
index 8e46f7b..e03ec21 100644
--- a/tlbmc/redfish/response.cc
+++ b/tlbmc/redfish/response.cc
@@ -92,6 +92,11 @@
   http_response_.result(boost::beast::http::status::service_unavailable);
 }
 
+void RedfishResponse::SetToNotReady(std::string_view error_message) {
+  SetToNotReady();
+  SetKeyInJsonBody("/error/message", error_message);
+}
+
 void RedfishResponse::SetToAbslStatus(const absl::Status& status) {
   switch (status.code()) {
     case absl::StatusCode::kNotFound:
diff --git a/tlbmc/redfish/response.h b/tlbmc/redfish/response.h
index 5541eb0..38d03c8 100644
--- a/tlbmc/redfish/response.h
+++ b/tlbmc/redfish/response.h
@@ -60,6 +60,8 @@
 
   void SetToNotReady();
 
+  void SetToNotReady(std::string_view error_message);
+
   std::string DebugString() const { return absl::StrCat(*grpc_response_); }
 
   const nlohmann::json& GetJsonBody() const { return json_body_; }
diff --git a/tlbmc/redfish/routes/sensor.cc b/tlbmc/redfish/routes/sensor.cc
index fc4d22a..3277c96 100644
--- a/tlbmc/redfish/routes/sensor.cc
+++ b/tlbmc/redfish/routes/sensor.cc
@@ -86,7 +86,8 @@
     const std::shared_ptr<const Sensor>& sensor,
     const nlohmann::json::json_pointer& sensor_pointer, RedfishResponse& resp) {
   if (sensor->GetSensorAttributesDynamic().state().status() != STATUS_READY) {
-    resp.SetToNotReady();
+    resp.SetToNotReady(
+        sensor->GetSensorAttributesDynamic().state().status_message());
     return;
   }
   const SensorAttributesStatic& attributes_static =
diff --git a/tlbmc/sensors/fan_pwm.cc b/tlbmc/sensors/fan_pwm.cc
index cd08bbc..30e6aac 100644
--- a/tlbmc/sensors/fan_pwm.cc
+++ b/tlbmc/sensors/fan_pwm.cc
@@ -95,8 +95,9 @@
   if (error) {
     State state;
     state.set_status(STATUS_STALE);
-    state.set_status_message(
-        absl::StrCat("Failed to read from input device: ", error.message()));
+    state.set_status_message(absl::Substitute(
+        "Failed to read from input device: $0; input device path: $1",
+        error.message(), GetInputDevicePath()));
     UpdateState(std::move(state));
     return;
   }
diff --git a/tlbmc/sensors/fan_tach.cc b/tlbmc/sensors/fan_tach.cc
index 2f67e96..139a25b 100644
--- a/tlbmc/sensors/fan_tach.cc
+++ b/tlbmc/sensors/fan_tach.cc
@@ -71,8 +71,9 @@
   if (error) {
     State state;
     state.set_status(STATUS_STALE);
-    state.set_status_message(
-        absl::StrCat("Failed to read from input device: ", error.message()));
+    state.set_status_message(absl::Substitute(
+        "Failed to read from input device: $0; input device path: $1",
+        error.message(), GetInputDevicePath()));
     UpdateState(std::move(state));
     return;
   }
diff --git a/tlbmc/sensors/hwmon_temp_sensor.cc b/tlbmc/sensors/hwmon_temp_sensor.cc
index 97dba01..5a65176 100644
--- a/tlbmc/sensors/hwmon_temp_sensor.cc
+++ b/tlbmc/sensors/hwmon_temp_sensor.cc
@@ -160,8 +160,9 @@
   if (error) {
     State state;
     state.set_status(STATUS_STALE);
-    state.set_status_message(
-        absl::StrCat("Failed to read from input device: ", error.message()));
+    state.set_status_message(absl::Substitute(
+        "Failed to read from input device: $0; input device path: $1",
+        error.message(), GetInputDevicePath()));
     UpdateState(std::move(state));
     return;
   }
diff --git a/tlbmc/sensors/i2c_hwmon_based_sensor.cc b/tlbmc/sensors/i2c_hwmon_based_sensor.cc
index 53d722b..02aa443 100644
--- a/tlbmc/sensors/i2c_hwmon_based_sensor.cc
+++ b/tlbmc/sensors/i2c_hwmon_based_sensor.cc
@@ -3,6 +3,7 @@
 #include <array>
 #include <cstddef>
 #include <exception>
+#include <filesystem>  // NOLINT
 #include <fstream>
 #include <ios>
 #include <memory>
@@ -19,6 +20,7 @@
 #include "absl/strings/match.h"
 #include "absl/strings/substitute.h"
 #include "boost/asio.hpp"  // NOLINT: boost::asio is commonly used in BMC
+#include "boost/asio/error.hpp"
 #include "boost/asio/random_access_file.hpp"  // NOLINT: boost::asio is commonly used in BMC
 #include "boost/filesystem.hpp"  // NOLINT: boost::filesystem is commonly used in BMC
 #include "boost/filesystem/operations.hpp"
@@ -34,6 +36,46 @@
 
 namespace milotic_tlbmc {
 
+void I2cHwmonBasedSensor::SetUpInput() {
+  // TODO(nanzhou): This logic should be eventually become a power signal based
+  // device deletion and recreation. E.g.,
+  // /sys/bus/i2c/devices/i2c-35/35-005c/hwmon/hwmon0/temp1_input
+  // This also assumes the label and device mapping does not changes after
+  // recreation.
+  boost::filesystem::path input_dev_path(input_dev_path_);
+  bool found_hwmon = false;
+  boost::filesystem::path hwmon_folder =
+      input_dev_path.parent_path().parent_path();
+  boost::system::error_code error;
+  for (const auto& entry :
+       boost::filesystem::directory_iterator(hwmon_folder, error)) {
+    if (error) {
+      break;
+    }
+    // Look for something like hwmon10
+    if (absl::StartsWith(entry.path().filename().string(), "hwmon")) {
+      input_dev_path_ = (entry.path() / input_dev_path.filename()).string();
+      found_hwmon = true;
+      break;
+    }
+  }
+  if (!found_hwmon) {
+    return;
+  }
+
+  try {
+    input_device_ = std::make_shared<boost::asio::random_access_file>(
+        *io_context_, input_dev_path_,
+        boost::asio::random_access_file::read_only);
+  } catch (const std::exception& e) {
+    input_device_ = nullptr;
+    LOG(WARNING) << "Failed to create io_uring based random_access_file for "
+                 << input_dev_path_ << ": " << e.what()
+                 << " . Fall back to ifstream.";
+    input_file_ = std::ifstream(input_dev_path_);
+  }
+}
+
 I2cHwmonBasedSensor::I2cHwmonBasedSensor(
     const std::string& input_dev_path,
     const std::shared_ptr<boost::asio::io_context>& io_context,
@@ -131,6 +173,17 @@
                       "usable; cancel the refresh";
       return;
     }
+    // The device might be deleted. E.g., the endpoint device lost
+    // power. In this case, we will try to set up the device again and
+    // bypass the refresh.
+    if (!boost::filesystem::exists(sensor->GetInputDevicePath())) {
+      sensor->SetUpInput();
+      sensor->HandleRefreshResult(boost::asio::error::no_such_device, 0);
+      if (callback) {
+        callback(sensor->GetSensorData());
+      }
+      return;
+    }
     if (sensor->GetInputDevice() != nullptr) {
       // These lines is covered on machines that have io_uring support, e.g.,
       // workstation. Add "--test_strategy=local" to your blaze command.
@@ -145,6 +198,10 @@
             if (!sensor) {
               return;
             }
+            // If the device is deleted, we will try to set up the device again
+            if (error == boost::asio::error::no_such_device) {
+              sensor->SetUpInput();
+            }
             sensor->HandleRefreshResult(error, bytes_read);
             if (callback) {
               callback(sensor->GetSensorData());
diff --git a/tlbmc/sensors/i2c_hwmon_based_sensor.h b/tlbmc/sensors/i2c_hwmon_based_sensor.h
index b2461a7..b1eb4c6 100644
--- a/tlbmc/sensors/i2c_hwmon_based_sensor.h
+++ b/tlbmc/sensors/i2c_hwmon_based_sensor.h
@@ -68,6 +68,10 @@
 
   std::string GetInputDevicePath() const { return input_dev_path_; }
 
+  // Sets up the input device. This function shall be called on the
+  // `io_context_` thread or at construction time.
+  void SetUpInput();
+
   // Descendants shall implement this function to handle the buffer that reads
   // from the input device. This function will be called on the `io_context_`
   // thread.
@@ -78,7 +82,7 @@
   static absl::StatusOr<SensorUnit> GetSensorUnit(std::string_view input_file);
 
  private:
-  const std::string input_dev_path_;
+  std::string input_dev_path_;
   std::shared_ptr<boost::asio::io_context> io_context_;
   // The buffer to read the value from.
   std::array<char, 128> read_buffer_ = {};
diff --git a/tlbmc/sensors/psu_sensor.cc b/tlbmc/sensors/psu_sensor.cc
index 677e640..f160154 100644
--- a/tlbmc/sensors/psu_sensor.cc
+++ b/tlbmc/sensors/psu_sensor.cc
@@ -284,8 +284,7 @@
       // If sensor label name is not customized in the config, create key from
       // prefix of sensor Name field and suffix of label_type_name
       // https://github.com/openbmc/dbus-sensors/blob/556e04b8f374a9eb8cf32bf0e36ac46c14873eba/src/psu/PSUSensorMain.cpp#L934
-      sensor_key =
-          absl::StrCat(config.name(), "_", properties.label_type_name);
+      sensor_key = absl::StrCat(config.name(), "_", properties.label_type_name);
       std::replace(sensor_key.begin(), sensor_key.end(), ' ', '_');
     }
     switch (sensor_unit) {
@@ -320,8 +319,9 @@
   if (error) {
     State state;
     state.set_status(STATUS_STALE);
-    state.set_status_message(
-        absl::StrCat("Failed to read from input device: ", error.message()));
+    state.set_status_message(absl::Substitute(
+        "Failed to read from input device: $0; input device path: $1",
+        error.message(), GetInputDevicePath()));
     UpdateState(std::move(state));
     return;
   }