blob: 8b80dd7df59fe1e4581af983fcc374bef0b628a9 [file] [log] [blame]
#include "tlbmc/sensors/intel_cpu_sensor.h"
#include <algorithm>
#include <array>
#include <cctype>
#include <charconv>
#include <chrono> // NOLINT: chrono is commonly used in BMC
#include <cstdint>
#include <cstring>
#include <fstream>
#include <iterator>
#include <memory>
#include <optional>
#include <string>
#include <system_error> // NOLINT: system_error is commonly used in BMC
#include <tuple>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/functional/any_invocable.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "g3/macros.h"
#include "entity_common_config.pb.h"
#include "intel_cpu_sensor_config.pb.h"
#include "reading_range_config.pb.h"
#include "reading_transform_config.pb.h"
#include "threshold_config.pb.h"
#include "tlbmc/hal/sysfs/peci.h"
#include "resource.pb.h"
#include "sensor.pb.h"
#include "tlbmc/sensors/peci_hwmon_based_sensor.h"
#include "tlbmc/sensors/sensor.h"
#include "tlbmc/time/time.h"
#include "re2/re2.h"
namespace milotic_tlbmc {
template <class TypeNameFirst, class TypeNameSecond>
struct TypeComparator {
// Compared by the first element which is the type name.
bool operator()(const std::pair<TypeNameFirst, TypeNameSecond>& lhs,
const std::pair<TypeNameFirst, TypeNameSecond>& rhs) const {
return lhs.first < rhs.first;
}
};
// Reading properties for IntelCpuSensors are statically configured by type.
// From:
// https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensor.cpp#L72
constexpr std::array<IntelCpuSensor::ReadingProperties, 2>
kDefaultIntelCpuSensorProperties = {
// Power sensor
IntelCpuSensor::ReadingProperties{.max_reading = 511,
.min_reading = 0,
.sensor_unit = SensorUnit::UNIT_WATT},
// Temperature sensor
IntelCpuSensor::ReadingProperties{
.max_reading = 127,
.min_reading = -128,
.sensor_unit = SensorUnit::UNIT_DEGREE_CELSIUS},
};
// From:
// https://github.com/openbmc/dbus-sensors/blob/master/src/Thresholds.cpp#L492
static const absl::flat_hash_map<
std::string, std::vector<std::tuple<std::string, ThresholdType>>>
kThresholdAttributesMap = { // NOLINT(google3-runtime-global-variables)
{"average",
{std::make_tuple("average_min", THRESHOLD_TYPE_LOWER_NON_CRITICAL),
std::make_tuple("average_max", THRESHOLD_TYPE_UPPER_NON_CRITICAL)}},
{"input",
{std::make_tuple("min", THRESHOLD_TYPE_LOWER_NON_CRITICAL),
std::make_tuple("max", THRESHOLD_TYPE_UPPER_NON_CRITICAL),
std::make_tuple("lcrit", THRESHOLD_TYPE_LOWER_CRITICAL),
std::make_tuple("crit", THRESHOLD_TYPE_UPPER_CRITICAL)}}};
ReadingRangeConfigs GetReadingRangeConfig(
const IntelCpuSensor::ReadingProperties& reading_properties) {
ReadingRangeConfigs reading_range_configs;
ReadingRangeConfig* reading_range_max =
reading_range_configs.add_reading_range_configs();
reading_range_max->set_type(READING_RANGE_TYPE_MAX);
reading_range_max->set_reading(reading_properties.max_reading);
ReadingRangeConfig* reading_range_min =
reading_range_configs.add_reading_range_configs();
reading_range_min->set_type(READING_RANGE_TYPE_MIN);
reading_range_min->set_reading(reading_properties.min_reading);
return reading_range_configs;
}
std::vector<boost::filesystem::path> IntelCpuSensor::FindFiles(
const boost::filesystem::path& root_dir, const RE2& regex, int max_depth) {
std::vector<boost::filesystem::path> files;
if (!boost::filesystem::exists(root_dir)) {
LOG(WARNING) << "root_dir does not exist: " << root_dir;
return files;
}
for (auto p = boost::filesystem::recursive_directory_iterator(
root_dir,
boost::filesystem::directory_options::follow_directory_symlink);
p != boost::filesystem::recursive_directory_iterator(); ++p) {
if (RE2::PartialMatch(p->path().string(), regex)) {
files.push_back(p->path());
}
if (p.depth() >= max_depth) {
p.disable_recursion_pending();
}
}
return files;
}
std::string IntelCpuSensor::CreateSensorName(absl::string_view label,
uint32_t cpu_id) {
std::string sensor_name = std::string(label);
SensorUnit sensor_unit = absl::StrContainsIgnoreCase(sensor_name, "power")
? SensorUnit::UNIT_WATT
: SensorUnit::UNIT_DEGREE_CELSIUS;
std::string cpu_name = absl::StrCat("CPU", cpu_id);
constexpr absl::string_view kDimmLabel = "DIMM";
std::size_t dimm_found = sensor_name.find(kDimmLabel);
if (dimm_found == std::string::npos) {
absl::StrAppend(&sensor_name, "_", cpu_name);
}
// Convert to Upper Camel case whole name
// From:
// https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L138
bool is_word_end = true;
std::transform(sensor_name.begin(), sensor_name.end(), sensor_name.begin(),
[&is_word_end](int c) {
if (c == '_') {
is_word_end = true;
} else {
if (is_word_end) {
is_word_end = false;
return std::toupper(c);
}
}
return c;
});
switch (sensor_unit) {
case SensorUnit::UNIT_WATT:
sensor_name = absl::StrCat("power_", sensor_name);
break;
case SensorUnit::UNIT_DEGREE_CELSIUS:
sensor_name = absl::StrCat("temperature_", sensor_name);
break;
default:
// IntelCpuSensors will never have these units
LOG(WARNING) << "IntelCpuSensor " << sensor_name
<< " has unexpected unit: " << sensor_unit;
break;
}
return absl::StrReplaceAll(sensor_name, {{" ", "_"}});
}
absl::StatusOr<std::tuple<std::string, std::string, std::string>>
IntelCpuSensor::SplitInputFile(const boost::filesystem::path& input_file_path) {
std::string input_file = input_file_path.filename().string();
size_t find_num = input_file.find_first_of("1234567890");
size_t find_underscore = input_file.find_first_of('_');
if (find_num == std::string::npos || find_underscore == std::string::npos ||
find_underscore <= find_num) {
return absl::InvalidArgumentError(
absl::Substitute("Invalid input file: $0", input_file));
}
return std::make_tuple(
input_file.substr(0, find_num),
input_file.substr(find_num, find_underscore - find_num),
input_file.substr(find_underscore + 1));
}
void IntelCpuSensor::HandleRefreshResult(const boost::system::error_code& error,
size_t bytes_read) {
if (error) {
State state;
state.set_status(STATUS_STALE);
state.set_status_message(absl::Substitute(
"Failed to read from input device: $0; input device path: $1",
error.message(), GetInputDevicePath()));
UpdateState(std::move(state));
return;
}
const char* buffer_end = GetConstReadBuffer().data() + bytes_read;
int64_t value = 0;
std::from_chars_result result =
std::from_chars(GetConstReadBuffer().data(), buffer_end, value);
if (result.ec != std::errc()) {
State state;
state.set_status(STATUS_STALE);
state.set_status_message(
absl::StrCat("Read data can't be converted to a number: ",
std::make_error_condition(result.ec).message()));
UpdateState(std::move(state));
return;
}
// TODO(b/449557765): Parse thresholds when tControl value changes instead of
// on every refresh.
ParseThresholdsFromHwmonFiles();
SensorValue sensor_data;
*sensor_data.mutable_timestamp() = Now();
sensor_data.set_reading(
(static_cast<double>(value) /
sensor_attributes_static_.reading_transform().scale()) +
sensor_attributes_static_.reading_transform().offset());
StoreSensorData(std::make_shared<const SensorValue>(std::move(sensor_data)));
State state;
state.set_status(STATUS_READY);
UpdateState(std::move(state));
}
RelatedItem IntelCpuSensor::CreateRelatedItem(
absl::string_view sensor_name, const IntelCpuSensorConfig& sensor_config) {
RelatedItem related_item;
auto it = sensor_config.name_to_related_item().find(sensor_name);
if (it != sensor_config.name_to_related_item().end()) {
related_item = it->second;
} else {
related_item.set_type(RESOURCE_TYPE_PROCESSOR);
related_item.set_id(absl::StrCat("cpu", sensor_config.cpu_id()));
}
return related_item;
}
absl::StatusOr<std::vector<std::shared_ptr<Sensor>>>
IntelCpuSensor::CreateInitialSensors(
const IntelCpuSensorConfig& sensor_config,
const std::shared_ptr<boost::asio::io_context>& io_context,
const PeciSysfs& peci_sysfs,
std::optional<NotificationCb> on_batch_notify) {
std::vector<std::shared_ptr<Sensor>> sensors;
for (const auto& [label, name] : sensor_config.label_to_name()) {
std::string base_name = name.empty() ? label : name;
IntelCpuSensor::ReadingProperties reading_properties =
absl::StrContainsIgnoreCase(base_name, "power")
? kDefaultIntelCpuSensorProperties[0]
: kDefaultIntelCpuSensorProperties[1];
std::string sensor_name =
CreateSensorName(base_name, sensor_config.cpu_id());
ReadingTransformConfig reading_transform_config;
reading_transform_config.set_scale(IntelCpuSensor::kScaleFactor);
// Each sensor needs a unique entity common config since each will have
// their own related item.
EntityCommonConfig entity_common_config =
sensor_config.entity_common_config();
*entity_common_config.mutable_related_item() =
CreateRelatedItem(base_name, sensor_config);
std::shared_ptr<IntelCpuSensor> sensor = std::make_shared<IntelCpuSensor>(
Token(), sensor_config.type(), reading_properties.sensor_unit, "",
sensor_name, label, sensor_config.hal_common_config(),
sensor_config.thresholds(), GetReadingRangeConfig(reading_properties),
reading_transform_config, entity_common_config,
sensor_config.dts_offset(), io_context, on_batch_notify, peci_sysfs);
State state;
state.set_status(STATUS_CREATION_PENDING);
state.set_status_message("Initial creation, CPU/DIMM not detected yet");
sensor->UpdateState(std::move(state));
LOG(INFO) << "Created IntelCpuSensor: " << sensor_name;
sensors.push_back(sensor);
}
return sensors;
}
absl::flat_hash_map<std::string, std::string>
IntelCpuSensor::CreateLabelToInputFileMap(
const boost::filesystem::path& peci_device_path,
const HalCommonConfig& hal_config) {
absl::flat_hash_map<std::string, std::string> label_to_input_file;
// Expected directory structure is
// /sys/bus/peci/devices/peci-$BUS/$BUS-$ADDRESS/peci-*.0/hwmon/hwmon*/name
// So we search with max depth of 4 for potential name files.
// From:
// https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L195
std::vector<boost::filesystem::path> hwmon_name_paths = FindFiles(
peci_sysfs_.GetBusPath(hal_config.bus()) /
peci_sysfs_.GetDeviceDirectoryName(hal_config),
RE2(absl::Substitute(IntelCpuSensor::kHwmonNameRegex, hal_config.bus(),
absl::Hex(hal_config.address()))),
4);
if (hwmon_name_paths.empty()) {
LOG(ERROR) << "No hwmon paths found for CPU sensors in system";
return label_to_input_file;
}
absl::flat_hash_set<std::string> scanned_directories;
for (const boost::filesystem::path& hwmon_name_path : hwmon_name_paths) {
auto hwmon_dir = hwmon_name_path.parent_path();
auto ret = scanned_directories.insert(hwmon_dir.string());
if (!ret.second) {
continue; // already processed this directory
}
std::ifstream name_file(hwmon_name_path.string());
if (!name_file.good()) {
LOG(WARNING) << "Failed to open file: " << hwmon_name_path.string();
continue;
}
std::string name = {std::istreambuf_iterator<char>(name_file),
std::istreambuf_iterator<char>()};
name_file.close();
if (name.empty()) {
continue;
}
auto parent_dir = hwmon_name_path.parent_path();
std::vector<boost::filesystem::path> input_paths =
FindFiles(parent_dir, *IntelCpuSensor::kInputRegex, 0);
if (input_paths.empty()) {
LOG(WARNING) << "No input files for sensors in system at " << parent_dir;
continue;
}
std::vector<std::shared_ptr<Sensor>> sensors;
for (const boost::filesystem::path& input_path : input_paths) {
auto find_underscore = input_path.filename().string().find('_');
if (find_underscore == std::string::npos) {
continue;
}
absl::StatusOr<std::tuple<std::string, std::string, std::string>>
input_file_parts = SplitInputFile(input_path);
if (!input_file_parts.ok()) {
LOG(INFO) << "Invalid input file: " << input_file_parts.status();
continue;
}
auto& [prefix, number, suffix] = *input_file_parts;
std::string label_file_path =
(parent_dir / absl::StrCat(prefix, number, "_label")).string();
std::ifstream label_file(label_file_path);
if (!label_file.good()) {
LOG(WARNING) << "Failed to open file: " << label_file_path;
continue;
}
std::string label = {std::istreambuf_iterator<char>(label_file),
std::istreambuf_iterator<char>()};
label = std::string(absl::StripTrailingAsciiWhitespace(label));
if (std::find(IntelCpuSensor::kHiddenProperties.begin(),
IntelCpuSensor::kHiddenProperties.end(),
label) != IntelCpuSensor::kHiddenProperties.end()) {
continue;
}
label_to_input_file[label] = input_path.string();
}
}
return label_to_input_file;
}
void IntelCpuSensor::ParseThresholdsFromHwmonFiles() {
if (absl::StatusOr<std::tuple<std::string, std::string, std::string>>
input_file_parts = SplitInputFile(input_dev_path_);
input_file_parts.ok()) {
auto& [prefix, number, suffix] = *input_file_parts;
if (kThresholdAttributesMap.contains(suffix)) {
ThresholdConfigs new_threshold_configs;
bool has_valid_threshold = false;
for (const auto& [attribute, threshold_type] :
kThresholdAttributesMap.at(suffix)) {
ThresholdConfig* threshold_config =
new_threshold_configs.add_threshold_configs();
threshold_config->set_type(threshold_type);
boost::filesystem::path attribute_file_path =
boost::filesystem::path(input_dev_path_).parent_path() /
absl::StrCat(prefix, number, "_", attribute);
std::ifstream attr_file(attribute_file_path.string());
if (!attr_file.good()) {
LOG(INFO) << "Failed to open file: " << attribute_file_path;
continue;
}
std::string attr_value_str = {std::istreambuf_iterator<char>(attr_file),
std::istreambuf_iterator<char>()};
double value = 0;
if (!absl::SimpleAtod(attr_value_str, &value)) {
LOG(INFO) << "Failed to convert value: " << attr_value_str
<< " to double";
continue;
}
double offset = (attribute == "crit") ? dts_offset_ : 0;
value = value / IntelCpuSensor::kScaleFactor + offset;
threshold_config->set_value(value);
// If at least one of the thresholds has a valid value, we should update
// this sensor's thresholds.
has_valid_threshold = true;
LOG(INFO) << "IntelCPUSensor parsed threshold: " << attribute_file_path
<< " " << attribute << " " << threshold_type << " " << value;
}
if (has_valid_threshold) {
UpdateThresholds(new_threshold_configs);
}
}
}
}
absl::Status IntelCpuSensor::ReinitializeInternal() {
if (IsInputDeviceUsable()) {
return absl::FailedPreconditionError(
absl::StrCat("Attempted to reinitialize usable sensor: ", sensor_name_,
". Ignoring reinitialization."));
}
ECCLESIA_ASSIGN_OR_RETURN(boost::filesystem::path peci_device_path,
PeciHwmonBasedSensor::CreateDevice(
sensor_attributes_static_.hal_common_config()));
absl::flat_hash_map<std::string, std::string> label_to_input_file =
CreateLabelToInputFileMap(peci_device_path,
sensor_attributes_static_.hal_common_config());
auto it = label_to_input_file.find(sensor_label_);
std::string error_message =
absl::StrCat("Failed to find ", sensor_label_, " in hwmon folder ",
peci_device_path.string());
if (it == label_to_input_file.end()) {
LOG(WARNING) << error_message;
// Set state to CREATION_FAILED with error message to indicate that the
// sensor was attempted to be created but failed.
return absl::NotFoundError(error_message);
}
SetInputDevicePath(it->second);
ParseThresholdsFromHwmonFiles();
// Set to Status UNKNOWN to simulate fresh creation of Sensor object.
State state;
state.set_status(STATUS_UNKNOWN);
state.set_status_message("Reinitialized, pending sensor refresh");
UpdateState(std::move(state));
// Set up the input device file for reading. This must be done last as right
// after this function is done, the sensor is readable.
SetUpInput();
LOG(INFO) << "Successfully reinitialized IntelCpuSensor: " << sensor_name_;
return absl::OkStatus();
}
void IntelCpuSensor::ReinitializeWithRetry(
absl::AnyInvocable<void(absl::Status)> callback, uint32_t attempts_left) {
absl::Status status = ReinitializeInternal();
if (status.ok()) {
callback(status);
return;
}
LOG(WARNING) << "Failed to reinitialize IntelCpuSensor: " << sensor_name_
<< " with status: " << status
<< ". Attempts left: " << attempts_left - 1;
if (attempts_left < 1) {
State state;
state.set_status(STATUS_CREATION_FAILED);
state.set_status_message(
absl::StrCat("Failed to reinitialize IntelCpuSensor after 3 attempts. "
"Last status: ",
status.message()));
UpdateState(std::move(state));
callback(status);
return;
}
retry_timer_.expires_after(std::chrono::seconds(
retry_delays_[retry_delays_.size() - attempts_left]));
retry_timer_.async_wait([this, callback = std::move(callback), attempts_left](
const boost::system::error_code& ec) mutable {
if (ec) {
LOG(ERROR) << "Retry timer error: " << ec.message();
callback(absl::InternalError(absl::StrCat(
"Retry timer error while reinitializing IntelCpuSensor: ",
ec.message())));
return;
}
ReinitializeWithRetry(std::move(callback), attempts_left - 1);
});
}
void IntelCpuSensor::Reinitialize(
absl::AnyInvocable<void(absl::Status)> callback) {
boost::asio::post(*io_context_,
[this, callback = std::move(callback)]() mutable {
ReinitializeWithRetry(std::move(callback), 3);
});
}
IntelCpuSensor::IntelCpuSensor(
Token token, IntelCpuSensorType sensor_type, SensorUnit sensor_unit,
const std::string& input_dev_path, const std::string& sensor_name,
const std::string& sensor_label, const HalCommonConfig& hal_common_config,
const ThresholdConfigs& threshold_configs,
const ReadingRangeConfigs& reading_range_configs,
const ReadingTransformConfig& reading_transform_config,
const EntityCommonConfig& entity_common_config, const double dts_offset,
const std::shared_ptr<boost::asio::io_context>& io_context,
std::optional<NotificationCb> on_batch_notify, const PeciSysfs& peci_sysfs)
: PeciHwmonBasedSensor(
input_dev_path, io_context,
CreateStaticAttributes(sensor_name, sensor_unit, hal_common_config,
entity_common_config, reading_range_configs,
reading_transform_config),
threshold_configs, std::move(on_batch_notify), peci_sysfs),
sensor_type_(sensor_type),
sensor_name_(sensor_name),
sensor_label_(sensor_label),
dts_offset_(dts_offset),
retry_timer_(*io_context) {}
IntelCpuSensor::IntelCpuSensor(
const PeciSysfs& peci_sysfs, const IntelCpuSensorConfig& config,
const std::string& sensor_label,
const std::shared_ptr<boost::asio::io_context>& io_context,
const std::vector<uint32_t>& retry_delays)
: PeciHwmonBasedSensor("", io_context,
CreateStaticAttributes(
"", SensorUnit::UNIT_UNKNOWN,
config.hal_common_config(), EntityCommonConfig(),
ReadingRangeConfigs(), ReadingTransformConfig()),
ThresholdConfigs(), std::nullopt, peci_sysfs),
retry_delays_(retry_delays),
sensor_label_(sensor_label),
retry_timer_(*io_context) {}
} // namespace milotic_tlbmc