| #include "tlbmc/sensors/redfish_aggregated_sensor.h" |
| |
| #include <algorithm> |
| #include <array> |
| #include <chrono> // NOLINT: chrono is commonly used in BMC |
| #include <cstdint> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| |
| #include "absl/functional/any_invocable.h" |
| #include "absl/log/log.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/synchronization/mutex.h" |
| #include "g3/macros.h" |
| #include "nlohmann/json.hpp" |
| #include "nlohmann/json_fwd.hpp" |
| #include "json_utils.h" |
| #include "reading_range_config.pb.h" |
| #include "redfish_aggregated_sensor_config.pb.h" |
| #include "tlbmc/http/http_client_interface.h" |
| #include "resource.pb.h" |
| #include "sensor.pb.h" |
| #include "tlbmc/sensors/sensor.h" |
| #include "tlbmc/time/time.h" |
| #include "google/protobuf/json/json.h" |
| #include "google/protobuf/util/json_util.h" |
| |
| namespace milotic_tlbmc { |
| |
| constexpr auto kSupportedSensorUnits = |
| std::to_array<std::pair<absl::string_view, SensorUnit>>( |
| {{"%", SensorUnit::UNIT_PERCENT}, |
| {"A", SensorUnit::UNIT_AMPERE}, |
| {"Cel", SensorUnit::UNIT_DEGREE_CELSIUS}, |
| {"J", SensorUnit::UNIT_JOULES}, |
| {"Pa", SensorUnit::UNIT_PASCAL}, |
| {"RPM", SensorUnit::UNIT_REVOLUTION_PER_MINUTE}, |
| {"V", SensorUnit::UNIT_VOLT}, |
| {"W", SensorUnit::UNIT_WATT}, |
| {"count", SensorUnit::UNIT_COUNT}}); |
| |
| absl::StatusOr<std::shared_ptr<RedfishAggregatedSensor>> |
| RedfishAggregatedSensor::Create( |
| const RedfishAggregatedSensorConfig& config, |
| const std::shared_ptr<boost::asio::io_context>& io_context, |
| std::unique_ptr<HttpClientInterface> http_client, |
| std::optional<NotificationCb> on_batch_notify) { |
| SensorUnit sensor_unit = ConvertUnitStringToSensorUnit(config.sensor_units()); |
| return std::make_shared<RedfishAggregatedSensor>( |
| Token(), config, config.entity_common_config(), sensor_unit, |
| std::move(http_client), io_context, on_batch_notify); |
| } |
| |
| RedfishAggregatedSensor::RedfishAggregatedSensor( |
| Token token, const RedfishAggregatedSensorConfig& config, |
| const EntityCommonConfig& entity_common_config, SensorUnit sensor_unit, |
| std::unique_ptr<HttpClientInterface> http_client, |
| const std::shared_ptr<boost::asio::io_context>& io_context, |
| std::optional<NotificationCb> on_batch_notify) |
| : Sensor(CreateStaticAttributes(config.name(), sensor_unit, |
| /*i2c_common_config=*/{}, |
| entity_common_config, ReadingRangeConfigs(), |
| /*reading_transform_config=*/{}), |
| /*threshold_configs=*/{}, on_batch_notify), |
| io_context_(io_context), |
| sensor_name_(config.name()), |
| config_(config), |
| http_client_(std::move(http_client)) { |
| State state; |
| state.set_status(STATUS_UNKNOWN); |
| state.set_status_message("Reinitialized, pending sensor refresh"); |
| UpdateState(std::move(state)); |
| } |
| |
| SensorUnit RedfishAggregatedSensor::ConvertUnitStringToSensorUnit( |
| absl::string_view unit_string) { |
| SensorUnit sensor_unit = SensorUnit::UNIT_UNKNOWN; |
| const auto* it = std::lower_bound( |
| kSupportedSensorUnits.begin(), kSupportedSensorUnits.end(), |
| std::pair<std::string_view, SensorUnit>{unit_string, |
| SensorUnit::UNIT_UNKNOWN}, |
| [](const auto& a, const auto& b) { return a.first < b.first; }); |
| if (it != kSupportedSensorUnits.end() && it->first == unit_string) { |
| sensor_unit = it->second; |
| } |
| return sensor_unit; |
| } |
| |
| void RedfishAggregatedSensor::UpdateStateAndTimestamp( |
| Status status, std::string_view status_message) { |
| State state; |
| state.set_status(status); |
| state.set_status_message(status_message); |
| UpdateState(std::move(state)); |
| if (callback_) { |
| callback_(GetSensorData()); |
| } |
| std::chrono::steady_clock::time_point last_refresh_end_time = |
| GetLastRefreshEndTime(); |
| if (last_refresh_end_time != std::chrono::steady_clock::time_point::min()) { |
| UpdateSoftwarePollingEndMetrics(std::chrono::steady_clock::now() - |
| last_refresh_end_time); |
| } |
| SetLastRefreshEndTime(std::chrono::steady_clock::now()); |
| } |
| |
| void RedfishAggregatedSensor::HandleScanCompletion( |
| boost::beast::error_code ec, |
| boost::beast::http::response<boost::beast::http::string_body> res) { |
| UpdateAggregatedPollingMetrics(std::chrono::steady_clock::now() - |
| aggregated_polling_start_time_); |
| aggregated_polling_start_time_ = std::chrono::steady_clock::time_point::min(); |
| |
| // For aggregated sensors, we expect to get a valid response with an empty |
| // error code,a 2xx status code and a non-empty body. |
| absl::Status response_status = absl::OkStatus(); |
| if (ec) { |
| // Check for error code. |
| response_status = absl::InternalError( |
| absl::StrCat("Failed to read sensor value with error: ", ec.message())); |
| } else if (res.result_int() < 200 || res.result_int() >= 300) { |
| // Check for response code. |
| response_status = absl::InternalError( |
| absl::StrCat("Failed to read sensor value with error: Bad response " |
| "code ", |
| res.result_int())); |
| } else if (res.body().empty()) { |
| // Check for empty body. |
| response_status = absl::InternalError( |
| "Failed to read sensor value with error: Empty body"); |
| } |
| |
| // Update the state and timestamp if the response is not valid. |
| if (!response_status.ok()) { |
| UpdateStateAndTimestamp(STATUS_STALE, response_status.message()); |
| return; |
| } |
| |
| const std::string& json_string = res.body(); |
| nlohmann::json sensor_json = nlohmann::json::parse(json_string); |
| if (!sensor_json.contains("Reading")) { |
| // Check for "Reading" field in the response body. |
| UpdateStateAndTimestamp( |
| STATUS_STALE, |
| absl::StrCat("Failed to read sensor value with error: Invalid " |
| "response body: ", |
| json_string)); |
| return; |
| } |
| |
| std::optional<double> sensor_value = |
| milotic::authz::GetValueAsDoubleFromFloatOrInteger(sensor_json, |
| "Reading"); |
| |
| // Readings from GPU sensors can be null. |
| if (!sensor_value.has_value()) { |
| UpdateStateAndTimestamp( |
| STATUS_STALE, "Failed to read sensor value with error: Null value"); |
| return; |
| } |
| |
| SensorValue sensor_data; |
| // milliseconds to timestamp |
| *sensor_data.mutable_timestamp() = Now(); |
| sensor_data.set_reading(sensor_value.value()); |
| DLOG(INFO) << "RedfishAggregatedSensor::RefreshOnceAsync success: " |
| << sensor_name_ << " " << sensor_data.reading(); |
| StoreSensorData(std::make_shared<const SensorValue>(std::move(sensor_data))); |
| UpdateStateAndTimestamp(STATUS_READY, "Successfully read sensor value"); |
| } |
| |
| void RedfishAggregatedSensor::RefreshOnceAsync( |
| absl::AnyInvocable<void(const std::shared_ptr<const SensorValue>&)> |
| callback) { |
| std::weak_ptr<RedfishAggregatedSensor> self = shared_from_this(); |
| callback_ = std::move(callback); |
| boost::asio::post(*io_context_, [self(std::move(self))]() mutable { |
| std::shared_ptr<RedfishAggregatedSensor> sensor = self.lock(); |
| if (!sensor) { |
| LOG(WARNING) << "Sensor is destroyed; cancel the refresh."; |
| return; |
| } |
| |
| std::chrono::steady_clock::time_point last_refresh_start_time = |
| sensor->GetLastRefreshStartTime(); |
| if (last_refresh_start_time != |
| std::chrono::steady_clock::time_point::min()) { |
| sensor->UpdateSoftwarePollingStartMetrics( |
| std::chrono::steady_clock::now() - last_refresh_start_time); |
| } |
| sensor->SetLastRefreshStartTime(std::chrono::steady_clock::now()); |
| |
| // Get the chassis name from the board config key by removing the aggregated |
| // prefix. The aggregated prefix is the common prefix for all aggregated |
| // Chassis but if we use go through HMC, we need to remove the prefix. |
| std::string chassis_name = |
| sensor->config_.entity_common_config().board_config_key().substr( |
| sensor->config_.aggregation_prefix().size()); |
| std::string target = absl::StrCat("/redfish/v1/Chassis/", chassis_name, |
| "/Sensors/", sensor->config_.name()); |
| absl::string_view host = sensor->config_.source(); // HMC_0 |
| absl::string_view port = "80"; |
| |
| // We set the 'sensor->aggregated_polling_start_time_' to |
| // 'time_point::min()' after every update so this value does not get |
| // overwritten by next RefreshOnceAsync call while the async http request is |
| // in progress. |
| if (sensor->aggregated_polling_start_time_ == |
| std::chrono::steady_clock::time_point::min()) { |
| sensor->aggregated_polling_start_time_ = std::chrono::steady_clock::now(); |
| } |
| |
| std::weak_ptr<RedfishAggregatedSensor> weak_sensor = sensor; |
| // Send an async request to the HMC. |
| absl::Status request_status = sensor->http_client_->SendRequest( |
| *(sensor->io_context_), host, port, target, |
| [weak_sensor]( |
| boost::beast::error_code ec, |
| boost::beast::http::response<boost::beast::http::string_body> res) { |
| std::shared_ptr<RedfishAggregatedSensor> locked_weak_sensor = |
| weak_sensor.lock(); |
| if (!locked_weak_sensor) { |
| LOG(WARNING) |
| << "Sensor destroyed before HandleScanCompletion could run."; |
| return; |
| } |
| |
| locked_weak_sensor->HandleScanCompletion(ec, res); |
| }); |
| if (!request_status.ok()) { |
| sensor->UpdateAggregatedPollingMetrics( |
| std::chrono::steady_clock::now() - |
| sensor->aggregated_polling_start_time_); |
| sensor->aggregated_polling_start_time_ = |
| std::chrono::steady_clock::time_point::min(); |
| sensor->UpdateStateAndTimestamp( |
| STATUS_STALE, absl::StrCat("Failed to read sensor value with error: ", |
| request_status.message())); |
| DLOG(INFO) << "RedfishAggregatedSensor::RefreshOnceAsync failed: " |
| << sensor->sensor_name_; |
| } |
| }); |
| } |
| |
| google::protobuf::Duration GetDurationFromMilliseconds(uint64_t milliseconds) { |
| google::protobuf::Duration duration; |
| duration.set_seconds(static_cast<int64_t>(milliseconds / 1000)); |
| duration.set_nanos(static_cast<int32_t>(milliseconds % 1000) * 1000000); |
| return duration; |
| } |
| |
| void RedfishAggregatedSensor::UpdateAggregatedPollingMetrics( |
| std::chrono::steady_clock::duration interval) { |
| // NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor. |
| absl::MutexLock lock(sensor_metrics_mutex_); |
| // At least increase the interval by 1ms to avoid total interval being zero. |
| if (interval < std::chrono::milliseconds(1)) { |
| interval = std::chrono::milliseconds(1); |
| } |
| total_aggregated_polling_interval_ms_ += |
| static_cast<uint64_t>(interval.count() / 1000000); |
| total_aggregated_read_cnt_++; |
| } |
| |
| SensorMetrics RedfishAggregatedSensor::GetSensorMetrics() const { |
| // NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor. |
| absl::MutexLock lock(sensor_metrics_mutex_); |
| SensorMetrics metrics; |
| // Check for aggregated read count instead of hardware read count because |
| // hardware read count does not apply to aggregated sensors. |
| if (total_aggregated_read_cnt_ == 0 || |
| total_software_polling_start_cnt_ == 0 || |
| total_software_polling_end_cnt_ == 0) { |
| metrics.mutable_average_hardware_polling_latency()->set_nanos(0); |
| metrics.mutable_average_software_polling_start_interval()->set_nanos(0); |
| metrics.mutable_average_software_polling_end_interval()->set_nanos(0); |
| return metrics; |
| } |
| |
| // Always set average_hardware_polling_latency to 0 for aggregated sensors. |
| *metrics.mutable_average_hardware_polling_latency() = |
| GetDurationFromMilliseconds(0); |
| |
| uint64_t average_software_polling_start_interval_ms = |
| total_software_polling_start_interval_ms_ / |
| total_software_polling_start_cnt_; |
| *metrics.mutable_average_software_polling_start_interval() = |
| GetDurationFromMilliseconds(average_software_polling_start_interval_ms); |
| |
| uint64_t average_software_polling_end_interval_ms = |
| total_software_polling_end_interval_ms_ / total_software_polling_end_cnt_; |
| *metrics.mutable_average_software_polling_end_interval() = |
| GetDurationFromMilliseconds(average_software_polling_end_interval_ms); |
| return metrics; |
| } |
| |
| void RedfishAggregatedSensor::ResetMetrics() { |
| Sensor::ResetMetrics(); |
| // NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor. |
| absl::MutexLock lock(sensor_metrics_mutex_); |
| total_aggregated_polling_interval_ms_ = 0; |
| total_aggregated_read_cnt_ = 0; |
| } |
| |
| absl::StatusOr<nlohmann::json> RedfishAggregatedSensor::GetMetricsAsJson( |
| const ::google::protobuf::util::JsonPrintOptions& options) const { |
| std::string base_metrics_json; |
| ECCLESIA_RETURN_IF_ERROR(::google::protobuf::json::MessageToJsonString( |
| GetSensorMetrics(), &base_metrics_json, options)); |
| |
| nlohmann::json metrics_json = |
| nlohmann::json::parse(base_metrics_json, nullptr, false); |
| |
| // Remove the average_hardware_polling_latency metric from the metrics json |
| // because it is not applicable to the aggregated sensors. |
| metrics_json.erase("average_hardware_polling_latency"); |
| |
| // Calculate the average aggregated polling interval and add it to the metrics |
| // json. |
| uint64_t average_aggregated_polling_interval_ms = 0; |
| { |
| // NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor. |
| absl::MutexLock lock(sensor_metrics_mutex_); |
| if (total_aggregated_read_cnt_ > 0 && total_software_polling_end_cnt_ > 0 && |
| total_software_polling_start_cnt_ > 0) { |
| average_aggregated_polling_interval_ms = |
| total_aggregated_polling_interval_ms_ / total_aggregated_read_cnt_; |
| } |
| } |
| google::protobuf::Duration duration = |
| GetDurationFromMilliseconds(average_aggregated_polling_interval_ms); |
| std::string duration_json; |
| if (!::google::protobuf::json::MessageToJsonString(duration, &duration_json, options) |
| .ok()) { |
| return absl::InternalError("Failed to serialize Duration proto."); |
| } |
| metrics_json["average_aggregated_polling_time"] = |
| nlohmann::json::parse(duration_json); |
| |
| return metrics_json; |
| } |
| |
| } // namespace milotic_tlbmc |