blob: 678737ef9ea3b670871b05d26e766a252ac79c25 [file] [log] [blame] [edit]
#include "tlbmc/sensors/redfish_aggregated_sensor.h"
#include <algorithm>
#include <array>
#include <chrono> // NOLINT: chrono is commonly used in BMC
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include "absl/functional/any_invocable.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "g3/macros.h"
#include "nlohmann/json.hpp"
#include "nlohmann/json_fwd.hpp"
#include "json_utils.h"
#include "reading_range_config.pb.h"
#include "redfish_aggregated_sensor_config.pb.h"
#include "tlbmc/http/http_client_interface.h"
#include "resource.pb.h"
#include "sensor.pb.h"
#include "tlbmc/sensors/sensor.h"
#include "tlbmc/time/time.h"
#include "google/protobuf/json/json.h"
#include "google/protobuf/util/json_util.h"
namespace milotic_tlbmc {
constexpr auto kSupportedSensorUnits =
std::to_array<std::pair<absl::string_view, SensorUnit>>(
{{"%", SensorUnit::UNIT_PERCENT},
{"A", SensorUnit::UNIT_AMPERE},
{"Cel", SensorUnit::UNIT_DEGREE_CELSIUS},
{"J", SensorUnit::UNIT_JOULES},
{"Pa", SensorUnit::UNIT_PASCAL},
{"RPM", SensorUnit::UNIT_REVOLUTION_PER_MINUTE},
{"V", SensorUnit::UNIT_VOLT},
{"W", SensorUnit::UNIT_WATT},
{"count", SensorUnit::UNIT_COUNT}});
absl::StatusOr<std::shared_ptr<RedfishAggregatedSensor>>
RedfishAggregatedSensor::Create(
const RedfishAggregatedSensorConfig& config,
const std::shared_ptr<boost::asio::io_context>& io_context,
std::unique_ptr<HttpClientInterface> http_client,
std::optional<NotificationCb> on_batch_notify) {
SensorUnit sensor_unit = ConvertUnitStringToSensorUnit(config.sensor_units());
return std::make_shared<RedfishAggregatedSensor>(
Token(), config, config.entity_common_config(), sensor_unit,
std::move(http_client), io_context, on_batch_notify);
}
RedfishAggregatedSensor::RedfishAggregatedSensor(
Token token, const RedfishAggregatedSensorConfig& config,
const EntityCommonConfig& entity_common_config, SensorUnit sensor_unit,
std::unique_ptr<HttpClientInterface> http_client,
const std::shared_ptr<boost::asio::io_context>& io_context,
std::optional<NotificationCb> on_batch_notify)
: Sensor(CreateStaticAttributes(config.name(), sensor_unit,
/*i2c_common_config=*/{},
entity_common_config, ReadingRangeConfigs(),
/*reading_transform_config=*/{}),
/*threshold_configs=*/{}, on_batch_notify),
io_context_(io_context),
sensor_name_(config.name()),
config_(config),
http_client_(std::move(http_client)) {
State state;
state.set_status(STATUS_UNKNOWN);
state.set_status_message("Reinitialized, pending sensor refresh");
UpdateState(std::move(state));
}
SensorUnit RedfishAggregatedSensor::ConvertUnitStringToSensorUnit(
absl::string_view unit_string) {
SensorUnit sensor_unit = SensorUnit::UNIT_UNKNOWN;
const auto* it = std::lower_bound(
kSupportedSensorUnits.begin(), kSupportedSensorUnits.end(),
std::pair<std::string_view, SensorUnit>{unit_string,
SensorUnit::UNIT_UNKNOWN},
[](const auto& a, const auto& b) { return a.first < b.first; });
if (it != kSupportedSensorUnits.end() && it->first == unit_string) {
sensor_unit = it->second;
}
return sensor_unit;
}
void RedfishAggregatedSensor::UpdateStateAndTimestamp(
Status status, std::string_view status_message) {
State state;
state.set_status(status);
state.set_status_message(status_message);
UpdateState(std::move(state));
if (callback_) {
callback_(GetSensorData());
}
std::chrono::steady_clock::time_point last_refresh_end_time =
GetLastRefreshEndTime();
if (last_refresh_end_time != std::chrono::steady_clock::time_point::min()) {
UpdateSoftwarePollingEndMetrics(std::chrono::steady_clock::now() -
last_refresh_end_time);
}
SetLastRefreshEndTime(std::chrono::steady_clock::now());
}
void RedfishAggregatedSensor::HandleScanCompletion(
boost::beast::error_code ec,
boost::beast::http::response<boost::beast::http::string_body> res) {
UpdateAggregatedPollingMetrics(std::chrono::steady_clock::now() -
aggregated_polling_start_time_);
aggregated_polling_start_time_ = std::chrono::steady_clock::time_point::min();
// For aggregated sensors, we expect to get a valid response with an empty
// error code,a 2xx status code and a non-empty body.
absl::Status response_status = absl::OkStatus();
if (ec) {
// Check for error code.
response_status = absl::InternalError(
absl::StrCat("Failed to read sensor value with error: ", ec.message()));
} else if (res.result_int() < 200 || res.result_int() >= 300) {
// Check for response code.
response_status = absl::InternalError(
absl::StrCat("Failed to read sensor value with error: Bad response "
"code ",
res.result_int()));
} else if (res.body().empty()) {
// Check for empty body.
response_status = absl::InternalError(
"Failed to read sensor value with error: Empty body");
}
// Update the state and timestamp if the response is not valid.
if (!response_status.ok()) {
UpdateStateAndTimestamp(STATUS_STALE, response_status.message());
return;
}
const std::string& json_string = res.body();
nlohmann::json sensor_json = nlohmann::json::parse(json_string);
if (!sensor_json.contains("Reading")) {
// Check for "Reading" field in the response body.
UpdateStateAndTimestamp(
STATUS_STALE,
absl::StrCat("Failed to read sensor value with error: Invalid "
"response body: ",
json_string));
return;
}
std::optional<double> sensor_value =
milotic::authz::GetValueAsDoubleFromFloatOrInteger(sensor_json,
"Reading");
// Readings from GPU sensors can be null.
if (!sensor_value.has_value()) {
UpdateStateAndTimestamp(
STATUS_STALE, "Failed to read sensor value with error: Null value");
return;
}
SensorValue sensor_data;
// milliseconds to timestamp
*sensor_data.mutable_timestamp() = Now();
sensor_data.set_reading(sensor_value.value());
DLOG(INFO) << "RedfishAggregatedSensor::RefreshOnceAsync success: "
<< sensor_name_ << " " << sensor_data.reading();
StoreSensorData(std::make_shared<const SensorValue>(std::move(sensor_data)));
UpdateStateAndTimestamp(STATUS_READY, "Successfully read sensor value");
}
void RedfishAggregatedSensor::RefreshOnceAsync(
absl::AnyInvocable<void(const std::shared_ptr<const SensorValue>&)>
callback) {
std::weak_ptr<RedfishAggregatedSensor> self = shared_from_this();
callback_ = std::move(callback);
boost::asio::post(*io_context_, [self(std::move(self))]() mutable {
std::shared_ptr<RedfishAggregatedSensor> sensor = self.lock();
if (!sensor) {
LOG(WARNING) << "Sensor is destroyed; cancel the refresh.";
return;
}
std::chrono::steady_clock::time_point last_refresh_start_time =
sensor->GetLastRefreshStartTime();
if (last_refresh_start_time !=
std::chrono::steady_clock::time_point::min()) {
sensor->UpdateSoftwarePollingStartMetrics(
std::chrono::steady_clock::now() - last_refresh_start_time);
}
sensor->SetLastRefreshStartTime(std::chrono::steady_clock::now());
// Get the chassis name from the board config key by removing the aggregated
// prefix. The aggregated prefix is the common prefix for all aggregated
// Chassis but if we use go through HMC, we need to remove the prefix.
std::string chassis_name =
sensor->config_.entity_common_config().board_config_key().substr(
sensor->config_.aggregation_prefix().size());
std::string target = absl::StrCat("/redfish/v1/Chassis/", chassis_name,
"/Sensors/", sensor->config_.name());
absl::string_view host = sensor->config_.source(); // HMC_0
absl::string_view port = "80";
// We set the 'sensor->aggregated_polling_start_time_' to
// 'time_point::min()' after every update so this value does not get
// overwritten by next RefreshOnceAsync call while the async http request is
// in progress.
if (sensor->aggregated_polling_start_time_ ==
std::chrono::steady_clock::time_point::min()) {
sensor->aggregated_polling_start_time_ = std::chrono::steady_clock::now();
}
std::weak_ptr<RedfishAggregatedSensor> weak_sensor = sensor;
// Send an async request to the HMC.
absl::Status request_status = sensor->http_client_->SendRequest(
*(sensor->io_context_), host, port, target,
[weak_sensor](
boost::beast::error_code ec,
boost::beast::http::response<boost::beast::http::string_body> res) {
std::shared_ptr<RedfishAggregatedSensor> locked_weak_sensor =
weak_sensor.lock();
if (!locked_weak_sensor) {
LOG(WARNING)
<< "Sensor destroyed before HandleScanCompletion could run.";
return;
}
locked_weak_sensor->HandleScanCompletion(ec, res);
});
if (!request_status.ok()) {
sensor->UpdateAggregatedPollingMetrics(
std::chrono::steady_clock::now() -
sensor->aggregated_polling_start_time_);
sensor->aggregated_polling_start_time_ =
std::chrono::steady_clock::time_point::min();
sensor->UpdateStateAndTimestamp(
STATUS_STALE, absl::StrCat("Failed to read sensor value with error: ",
request_status.message()));
DLOG(INFO) << "RedfishAggregatedSensor::RefreshOnceAsync failed: "
<< sensor->sensor_name_;
}
});
}
google::protobuf::Duration GetDurationFromMilliseconds(uint64_t milliseconds) {
google::protobuf::Duration duration;
duration.set_seconds(static_cast<int64_t>(milliseconds / 1000));
duration.set_nanos(static_cast<int32_t>(milliseconds % 1000) * 1000000);
return duration;
}
void RedfishAggregatedSensor::UpdateAggregatedPollingMetrics(
std::chrono::steady_clock::duration interval) {
// NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor.
absl::MutexLock lock(sensor_metrics_mutex_);
// At least increase the interval by 1ms to avoid total interval being zero.
if (interval < std::chrono::milliseconds(1)) {
interval = std::chrono::milliseconds(1);
}
total_aggregated_polling_interval_ms_ +=
static_cast<uint64_t>(interval.count() / 1000000);
total_aggregated_read_cnt_++;
}
SensorMetrics RedfishAggregatedSensor::GetSensorMetrics() const {
// NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor.
absl::MutexLock lock(sensor_metrics_mutex_);
SensorMetrics metrics;
// Check for aggregated read count instead of hardware read count because
// hardware read count does not apply to aggregated sensors.
if (total_aggregated_read_cnt_ == 0 ||
total_software_polling_start_cnt_ == 0 ||
total_software_polling_end_cnt_ == 0) {
metrics.mutable_average_hardware_polling_latency()->set_nanos(0);
metrics.mutable_average_software_polling_start_interval()->set_nanos(0);
metrics.mutable_average_software_polling_end_interval()->set_nanos(0);
return metrics;
}
// Always set average_hardware_polling_latency to 0 for aggregated sensors.
*metrics.mutable_average_hardware_polling_latency() =
GetDurationFromMilliseconds(0);
uint64_t average_software_polling_start_interval_ms =
total_software_polling_start_interval_ms_ /
total_software_polling_start_cnt_;
*metrics.mutable_average_software_polling_start_interval() =
GetDurationFromMilliseconds(average_software_polling_start_interval_ms);
uint64_t average_software_polling_end_interval_ms =
total_software_polling_end_interval_ms_ / total_software_polling_end_cnt_;
*metrics.mutable_average_software_polling_end_interval() =
GetDurationFromMilliseconds(average_software_polling_end_interval_ms);
return metrics;
}
void RedfishAggregatedSensor::ResetMetrics() {
Sensor::ResetMetrics();
// NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor.
absl::MutexLock lock(sensor_metrics_mutex_);
total_aggregated_polling_interval_ms_ = 0;
total_aggregated_read_cnt_ = 0;
}
absl::StatusOr<nlohmann::json> RedfishAggregatedSensor::GetMetricsAsJson(
const ::google::protobuf::util::JsonPrintOptions& options) const {
std::string base_metrics_json;
ECCLESIA_RETURN_IF_ERROR(::google::protobuf::json::MessageToJsonString(
GetSensorMetrics(), &base_metrics_json, options));
nlohmann::json metrics_json =
nlohmann::json::parse(base_metrics_json, nullptr, false);
// Remove the average_hardware_polling_latency metric from the metrics json
// because it is not applicable to the aggregated sensors.
metrics_json.erase("average_hardware_polling_latency");
// Calculate the average aggregated polling interval and add it to the metrics
// json.
uint64_t average_aggregated_polling_interval_ms = 0;
{
// NOLINTNEXTLINE: Yocto's Abseil doesn't support non-pointer constructor.
absl::MutexLock lock(sensor_metrics_mutex_);
if (total_aggregated_read_cnt_ > 0 && total_software_polling_end_cnt_ > 0 &&
total_software_polling_start_cnt_ > 0) {
average_aggregated_polling_interval_ms =
total_aggregated_polling_interval_ms_ / total_aggregated_read_cnt_;
}
}
google::protobuf::Duration duration =
GetDurationFromMilliseconds(average_aggregated_polling_interval_ms);
std::string duration_json;
if (!::google::protobuf::json::MessageToJsonString(duration, &duration_json, options)
.ok()) {
return absl::InternalError("Failed to serialize Duration proto.");
}
metrics_json["average_aggregated_polling_time"] =
nlohmann::json::parse(duration_json);
return metrics_json;
}
} // namespace milotic_tlbmc