| #include "tlbmc/sensors/intel_cpu_sensor.h" |
| |
| #include <algorithm> |
| #include <array> |
| #include <cctype> |
| #include <charconv> |
| #include <chrono> // NOLINT: chrono is commonly used in BMC |
| #include <cstdint> |
| #include <cstring> |
| #include <fstream> |
| #include <iterator> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <system_error> // NOLINT: system_error is commonly used in BMC |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/container/flat_hash_set.h" |
| #include "absl/functional/any_invocable.h" |
| #include "absl/log/log.h" |
| #include "absl/status/status.h" |
| #include "absl/strings/ascii.h" |
| #include "absl/strings/match.h" |
| #include "absl/strings/numbers.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/str_replace.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/strings/substitute.h" |
| #include "g3/macros.h" |
| #include "entity_common_config.pb.h" |
| #include "intel_cpu_sensor_config.pb.h" |
| #include "reading_range_config.pb.h" |
| #include "reading_transform_config.pb.h" |
| #include "threshold_config.pb.h" |
| #include "tlbmc/hal/sysfs/peci.h" |
| #include "resource.pb.h" |
| #include "sensor.pb.h" |
| #include "tlbmc/sensors/peci_hwmon_based_sensor.h" |
| #include "tlbmc/sensors/sensor.h" |
| #include "tlbmc/time/time.h" |
| #include "re2/re2.h" |
| |
| namespace milotic_tlbmc { |
| |
| template <class TypeNameFirst, class TypeNameSecond> |
| struct TypeComparator { |
| // Compared by the first element which is the type name. |
| bool operator()(const std::pair<TypeNameFirst, TypeNameSecond>& lhs, |
| const std::pair<TypeNameFirst, TypeNameSecond>& rhs) const { |
| return lhs.first < rhs.first; |
| } |
| }; |
| |
| // Reading properties for IntelCpuSensors are statically configured by type. |
| // From: |
| // https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensor.cpp#L72 |
| constexpr std::array<IntelCpuSensor::ReadingProperties, 2> |
| kDefaultIntelCpuSensorProperties = { |
| // Power sensor |
| IntelCpuSensor::ReadingProperties{.max_reading = 511, |
| .min_reading = 0, |
| .sensor_unit = SensorUnit::UNIT_WATT}, |
| // Temperature sensor |
| IntelCpuSensor::ReadingProperties{ |
| .max_reading = 127, |
| .min_reading = -128, |
| .sensor_unit = SensorUnit::UNIT_DEGREE_CELSIUS}, |
| }; |
| |
| // From: |
| // https://github.com/openbmc/dbus-sensors/blob/master/src/Thresholds.cpp#L492 |
| static const absl::flat_hash_map< |
| std::string, std::vector<std::tuple<std::string, ThresholdType>>> |
| kThresholdAttributesMap = { // NOLINT(google3-runtime-global-variables) |
| {"average", |
| {std::make_tuple("average_min", THRESHOLD_TYPE_LOWER_NON_CRITICAL), |
| std::make_tuple("average_max", THRESHOLD_TYPE_UPPER_NON_CRITICAL)}}, |
| {"input", |
| {std::make_tuple("min", THRESHOLD_TYPE_LOWER_NON_CRITICAL), |
| std::make_tuple("max", THRESHOLD_TYPE_UPPER_NON_CRITICAL), |
| std::make_tuple("lcrit", THRESHOLD_TYPE_LOWER_CRITICAL), |
| std::make_tuple("crit", THRESHOLD_TYPE_UPPER_CRITICAL)}}}; |
| |
| ReadingRangeConfigs GetReadingRangeConfig( |
| const IntelCpuSensor::ReadingProperties& reading_properties) { |
| ReadingRangeConfigs reading_range_configs; |
| ReadingRangeConfig* reading_range_max = |
| reading_range_configs.add_reading_range_configs(); |
| reading_range_max->set_type(READING_RANGE_TYPE_MAX); |
| reading_range_max->set_reading(reading_properties.max_reading); |
| ReadingRangeConfig* reading_range_min = |
| reading_range_configs.add_reading_range_configs(); |
| reading_range_min->set_type(READING_RANGE_TYPE_MIN); |
| reading_range_min->set_reading(reading_properties.min_reading); |
| return reading_range_configs; |
| } |
| |
| std::vector<boost::filesystem::path> IntelCpuSensor::FindFiles( |
| const boost::filesystem::path& root_dir, const RE2& regex, int max_depth) { |
| std::vector<boost::filesystem::path> files; |
| if (!boost::filesystem::exists(root_dir)) { |
| LOG(WARNING) << "root_dir does not exist: " << root_dir; |
| return files; |
| } |
| for (auto p = boost::filesystem::recursive_directory_iterator( |
| root_dir, |
| boost::filesystem::directory_options::follow_directory_symlink); |
| p != boost::filesystem::recursive_directory_iterator(); ++p) { |
| if (RE2::PartialMatch(p->path().string(), regex)) { |
| files.push_back(p->path()); |
| } |
| if (p.depth() >= max_depth) { |
| p.disable_recursion_pending(); |
| } |
| } |
| |
| return files; |
| } |
| |
| std::string IntelCpuSensor::CreateSensorName(absl::string_view label, |
| uint32_t cpu_id) { |
| std::string sensor_name = std::string(label); |
| SensorUnit sensor_unit = absl::StrContainsIgnoreCase(sensor_name, "power") |
| ? SensorUnit::UNIT_WATT |
| : SensorUnit::UNIT_DEGREE_CELSIUS; |
| |
| std::string cpu_name = absl::StrCat("CPU", cpu_id); |
| constexpr absl::string_view kDimmLabel = "DIMM"; |
| std::size_t dimm_found = sensor_name.find(kDimmLabel); |
| if (dimm_found == std::string::npos) { |
| absl::StrAppend(&sensor_name, "_", cpu_name); |
| } |
| |
| // Convert to Upper Camel case whole name |
| // From: |
| // https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L138 |
| bool is_word_end = true; |
| std::transform(sensor_name.begin(), sensor_name.end(), sensor_name.begin(), |
| [&is_word_end](int c) { |
| if (c == '_') { |
| is_word_end = true; |
| } else { |
| if (is_word_end) { |
| is_word_end = false; |
| return std::toupper(c); |
| } |
| } |
| return c; |
| }); |
| |
| switch (sensor_unit) { |
| case SensorUnit::UNIT_WATT: |
| sensor_name = absl::StrCat("power_", sensor_name); |
| break; |
| case SensorUnit::UNIT_DEGREE_CELSIUS: |
| sensor_name = absl::StrCat("temperature_", sensor_name); |
| break; |
| default: |
| // IntelCpuSensors will never have these units |
| LOG(WARNING) << "IntelCpuSensor " << sensor_name |
| << " has unexpected unit: " << sensor_unit; |
| break; |
| } |
| |
| return absl::StrReplaceAll(sensor_name, {{" ", "_"}}); |
| } |
| |
| absl::StatusOr<std::tuple<std::string, std::string, std::string>> |
| IntelCpuSensor::SplitInputFile(const boost::filesystem::path& input_file_path) { |
| std::string input_file = input_file_path.filename().string(); |
| size_t find_num = input_file.find_first_of("1234567890"); |
| size_t find_underscore = input_file.find_first_of('_'); |
| if (find_num == std::string::npos || find_underscore == std::string::npos || |
| find_underscore <= find_num) { |
| return absl::InvalidArgumentError( |
| absl::Substitute("Invalid input file: $0", input_file)); |
| } |
| return std::make_tuple( |
| input_file.substr(0, find_num), |
| input_file.substr(find_num, find_underscore - find_num), |
| input_file.substr(find_underscore + 1)); |
| } |
| |
| void IntelCpuSensor::HandleRefreshResult(const boost::system::error_code& error, |
| size_t bytes_read) { |
| if (error) { |
| State state; |
| state.set_status(STATUS_STALE); |
| state.set_status_message(absl::Substitute( |
| "Failed to read from input device: $0; input device path: $1", |
| error.message(), GetInputDevicePath())); |
| UpdateState(std::move(state)); |
| return; |
| } |
| const char* buffer_end = GetConstReadBuffer().data() + bytes_read; |
| int64_t value = 0; |
| std::from_chars_result result = |
| std::from_chars(GetConstReadBuffer().data(), buffer_end, value); |
| if (result.ec != std::errc()) { |
| State state; |
| state.set_status(STATUS_STALE); |
| state.set_status_message( |
| absl::StrCat("Read data can't be converted to a number: ", |
| std::make_error_condition(result.ec).message())); |
| UpdateState(std::move(state)); |
| return; |
| } |
| // TODO(b/449557765): Parse thresholds when tControl value changes instead of |
| // on every refresh. |
| ParseThresholdsFromHwmonFiles(); |
| SensorValue sensor_data; |
| *sensor_data.mutable_timestamp() = Now(); |
| sensor_data.set_reading( |
| (static_cast<double>(value) / |
| sensor_attributes_static_.reading_transform().scale()) + |
| sensor_attributes_static_.reading_transform().offset()); |
| StoreSensorData(std::make_shared<const SensorValue>(std::move(sensor_data))); |
| State state; |
| state.set_status(STATUS_READY); |
| UpdateState(std::move(state)); |
| } |
| |
| RelatedItem IntelCpuSensor::CreateRelatedItem( |
| absl::string_view sensor_name, const IntelCpuSensorConfig& sensor_config) { |
| RelatedItem related_item; |
| auto it = sensor_config.name_to_related_item().find(sensor_name); |
| if (it != sensor_config.name_to_related_item().end()) { |
| related_item = it->second; |
| } else { |
| related_item.set_type(RESOURCE_TYPE_PROCESSOR); |
| related_item.set_id(absl::StrCat("cpu", sensor_config.cpu_id())); |
| } |
| return related_item; |
| } |
| |
| absl::StatusOr<std::vector<std::shared_ptr<Sensor>>> |
| IntelCpuSensor::CreateInitialSensors( |
| const IntelCpuSensorConfig& sensor_config, |
| const std::shared_ptr<boost::asio::io_context>& io_context, |
| const PeciSysfs& peci_sysfs, |
| std::optional<NotificationCb> on_batch_notify) { |
| std::vector<std::shared_ptr<Sensor>> sensors; |
| for (const auto& [label, name] : sensor_config.label_to_name()) { |
| std::string base_name = name.empty() ? label : name; |
| IntelCpuSensor::ReadingProperties reading_properties = |
| absl::StrContainsIgnoreCase(base_name, "power") |
| ? kDefaultIntelCpuSensorProperties[0] |
| : kDefaultIntelCpuSensorProperties[1]; |
| std::string sensor_name = |
| CreateSensorName(base_name, sensor_config.cpu_id()); |
| ReadingTransformConfig reading_transform_config; |
| reading_transform_config.set_scale(IntelCpuSensor::kScaleFactor); |
| |
| // Each sensor needs a unique entity common config since each will have |
| // their own related item. |
| EntityCommonConfig entity_common_config = |
| sensor_config.entity_common_config(); |
| *entity_common_config.mutable_related_item() = |
| CreateRelatedItem(base_name, sensor_config); |
| |
| std::shared_ptr<IntelCpuSensor> sensor = std::make_shared<IntelCpuSensor>( |
| Token(), sensor_config.type(), reading_properties.sensor_unit, "", |
| sensor_name, label, sensor_config.hal_common_config(), |
| sensor_config.thresholds(), GetReadingRangeConfig(reading_properties), |
| reading_transform_config, entity_common_config, |
| sensor_config.dts_offset(), io_context, on_batch_notify, peci_sysfs); |
| State state; |
| state.set_status(STATUS_CREATION_PENDING); |
| state.set_status_message("Initial creation, CPU/DIMM not detected yet"); |
| sensor->UpdateState(std::move(state)); |
| LOG(INFO) << "Created IntelCpuSensor: " << sensor_name; |
| sensors.push_back(sensor); |
| } |
| |
| return sensors; |
| } |
| |
| absl::flat_hash_map<std::string, std::string> |
| IntelCpuSensor::CreateLabelToInputFileMap( |
| const boost::filesystem::path& peci_device_path, |
| const HalCommonConfig& hal_config) { |
| absl::flat_hash_map<std::string, std::string> label_to_input_file; |
| |
| // Expected directory structure is |
| // /sys/bus/peci/devices/peci-$BUS/$BUS-$ADDRESS/peci-*.0/hwmon/hwmon*/name |
| // So we search with max depth of 4 for potential name files. |
| // From: |
| // https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L195 |
| std::vector<boost::filesystem::path> hwmon_name_paths = FindFiles( |
| peci_sysfs_.GetBusPath(hal_config.bus()) / |
| peci_sysfs_.GetDeviceDirectoryName(hal_config), |
| RE2(absl::Substitute(IntelCpuSensor::kHwmonNameRegex, hal_config.bus(), |
| absl::Hex(hal_config.address()))), |
| 4); |
| if (hwmon_name_paths.empty()) { |
| LOG(ERROR) << "No hwmon paths found for CPU sensors in system"; |
| return label_to_input_file; |
| } |
| |
| absl::flat_hash_set<std::string> scanned_directories; |
| for (const boost::filesystem::path& hwmon_name_path : hwmon_name_paths) { |
| auto hwmon_dir = hwmon_name_path.parent_path(); |
| auto ret = scanned_directories.insert(hwmon_dir.string()); |
| if (!ret.second) { |
| continue; // already processed this directory |
| } |
| |
| std::ifstream name_file(hwmon_name_path.string()); |
| if (!name_file.good()) { |
| LOG(WARNING) << "Failed to open file: " << hwmon_name_path.string(); |
| continue; |
| } |
| std::string name = {std::istreambuf_iterator<char>(name_file), |
| std::istreambuf_iterator<char>()}; |
| name_file.close(); |
| if (name.empty()) { |
| continue; |
| } |
| |
| auto parent_dir = hwmon_name_path.parent_path(); |
| std::vector<boost::filesystem::path> input_paths = |
| FindFiles(parent_dir, *IntelCpuSensor::kInputRegex, 0); |
| if (input_paths.empty()) { |
| LOG(WARNING) << "No input files for sensors in system at " << parent_dir; |
| continue; |
| } |
| std::vector<std::shared_ptr<Sensor>> sensors; |
| for (const boost::filesystem::path& input_path : input_paths) { |
| auto find_underscore = input_path.filename().string().find('_'); |
| if (find_underscore == std::string::npos) { |
| continue; |
| } |
| absl::StatusOr<std::tuple<std::string, std::string, std::string>> |
| input_file_parts = SplitInputFile(input_path); |
| if (!input_file_parts.ok()) { |
| LOG(INFO) << "Invalid input file: " << input_file_parts.status(); |
| continue; |
| } |
| auto& [prefix, number, suffix] = *input_file_parts; |
| std::string label_file_path = |
| (parent_dir / absl::StrCat(prefix, number, "_label")).string(); |
| std::ifstream label_file(label_file_path); |
| if (!label_file.good()) { |
| LOG(WARNING) << "Failed to open file: " << label_file_path; |
| continue; |
| } |
| std::string label = {std::istreambuf_iterator<char>(label_file), |
| std::istreambuf_iterator<char>()}; |
| label = std::string(absl::StripTrailingAsciiWhitespace(label)); |
| |
| if (std::find(IntelCpuSensor::kHiddenProperties.begin(), |
| IntelCpuSensor::kHiddenProperties.end(), |
| label) != IntelCpuSensor::kHiddenProperties.end()) { |
| continue; |
| } |
| |
| label_to_input_file[label] = input_path.string(); |
| } |
| } |
| return label_to_input_file; |
| } |
| |
| void IntelCpuSensor::ParseThresholdsFromHwmonFiles() { |
| if (absl::StatusOr<std::tuple<std::string, std::string, std::string>> |
| input_file_parts = SplitInputFile(input_dev_path_); |
| input_file_parts.ok()) { |
| auto& [prefix, number, suffix] = *input_file_parts; |
| if (kThresholdAttributesMap.contains(suffix)) { |
| ThresholdConfigs new_threshold_configs; |
| bool has_valid_threshold = false; |
| for (const auto& [attribute, threshold_type] : |
| kThresholdAttributesMap.at(suffix)) { |
| ThresholdConfig* threshold_config = |
| new_threshold_configs.add_threshold_configs(); |
| threshold_config->set_type(threshold_type); |
| boost::filesystem::path attribute_file_path = |
| boost::filesystem::path(input_dev_path_).parent_path() / |
| absl::StrCat(prefix, number, "_", attribute); |
| std::ifstream attr_file(attribute_file_path.string()); |
| if (!attr_file.good()) { |
| LOG(INFO) << "Failed to open file: " << attribute_file_path; |
| continue; |
| } |
| std::string attr_value_str = {std::istreambuf_iterator<char>(attr_file), |
| std::istreambuf_iterator<char>()}; |
| double value = 0; |
| if (!absl::SimpleAtod(attr_value_str, &value)) { |
| LOG(INFO) << "Failed to convert value: " << attr_value_str |
| << " to double"; |
| continue; |
| } |
| double offset = (attribute == "crit") ? dts_offset_ : 0; |
| value = value / IntelCpuSensor::kScaleFactor + offset; |
| threshold_config->set_value(value); |
| // If at least one of the thresholds has a valid value, we should update |
| // this sensor's thresholds. |
| has_valid_threshold = true; |
| LOG(INFO) << "IntelCPUSensor parsed threshold: " << attribute_file_path |
| << " " << attribute << " " << threshold_type << " " << value; |
| } |
| |
| if (has_valid_threshold) { |
| UpdateThresholds(new_threshold_configs); |
| } |
| } |
| } |
| } |
| |
| absl::Status IntelCpuSensor::ReinitializeInternal() { |
| if (IsInputDeviceUsable()) { |
| return absl::FailedPreconditionError( |
| absl::StrCat("Attempted to reinitialize usable sensor: ", sensor_name_, |
| ". Ignoring reinitialization.")); |
| } |
| |
| ECCLESIA_ASSIGN_OR_RETURN(boost::filesystem::path peci_device_path, |
| PeciHwmonBasedSensor::CreateDevice( |
| sensor_attributes_static_.hal_common_config())); |
| |
| absl::flat_hash_map<std::string, std::string> label_to_input_file = |
| CreateLabelToInputFileMap(peci_device_path, |
| sensor_attributes_static_.hal_common_config()); |
| |
| auto it = label_to_input_file.find(sensor_label_); |
| std::string error_message = |
| absl::StrCat("Failed to find ", sensor_label_, " in hwmon folder ", |
| peci_device_path.string()); |
| if (it == label_to_input_file.end()) { |
| LOG(WARNING) << error_message; |
| // Set state to CREATION_FAILED with error message to indicate that the |
| // sensor was attempted to be created but failed. |
| return absl::NotFoundError(error_message); |
| } |
| |
| SetInputDevicePath(it->second); |
| |
| ParseThresholdsFromHwmonFiles(); |
| |
| // Set to Status UNKNOWN to simulate fresh creation of Sensor object. |
| State state; |
| state.set_status(STATUS_UNKNOWN); |
| state.set_status_message("Reinitialized, pending sensor refresh"); |
| UpdateState(std::move(state)); |
| |
| // Set up the input device file for reading. This must be done last as right |
| // after this function is done, the sensor is readable. |
| SetUpInput(); |
| |
| LOG(INFO) << "Successfully reinitialized IntelCpuSensor: " << sensor_name_; |
| |
| return absl::OkStatus(); |
| } |
| |
| void IntelCpuSensor::ReinitializeWithRetry( |
| absl::AnyInvocable<void(absl::Status)> callback, uint32_t attempts_left) { |
| absl::Status status = ReinitializeInternal(); |
| if (status.ok()) { |
| callback(status); |
| return; |
| } |
| |
| LOG(WARNING) << "Failed to reinitialize IntelCpuSensor: " << sensor_name_ |
| << " with status: " << status |
| << ". Attempts left: " << attempts_left - 1; |
| |
| if (attempts_left < 1) { |
| State state; |
| state.set_status(STATUS_CREATION_FAILED); |
| state.set_status_message( |
| absl::StrCat("Failed to reinitialize IntelCpuSensor after 3 attempts. " |
| "Last status: ", |
| status.message())); |
| UpdateState(std::move(state)); |
| callback(status); |
| return; |
| } |
| |
| retry_timer_.expires_after(std::chrono::seconds( |
| retry_delays_[retry_delays_.size() - attempts_left])); |
| retry_timer_.async_wait([this, callback = std::move(callback), attempts_left]( |
| const boost::system::error_code& ec) mutable { |
| if (ec) { |
| LOG(ERROR) << "Retry timer error: " << ec.message(); |
| callback(absl::InternalError(absl::StrCat( |
| "Retry timer error while reinitializing IntelCpuSensor: ", |
| ec.message()))); |
| return; |
| } |
| ReinitializeWithRetry(std::move(callback), attempts_left - 1); |
| }); |
| } |
| |
| void IntelCpuSensor::Reinitialize( |
| absl::AnyInvocable<void(absl::Status)> callback) { |
| boost::asio::post(*io_context_, |
| [this, callback = std::move(callback)]() mutable { |
| ReinitializeWithRetry(std::move(callback), 3); |
| }); |
| } |
| |
| IntelCpuSensor::IntelCpuSensor( |
| Token token, IntelCpuSensorType sensor_type, SensorUnit sensor_unit, |
| const std::string& input_dev_path, const std::string& sensor_name, |
| const std::string& sensor_label, const HalCommonConfig& hal_common_config, |
| const ThresholdConfigs& threshold_configs, |
| const ReadingRangeConfigs& reading_range_configs, |
| const ReadingTransformConfig& reading_transform_config, |
| const EntityCommonConfig& entity_common_config, const double dts_offset, |
| const std::shared_ptr<boost::asio::io_context>& io_context, |
| std::optional<NotificationCb> on_batch_notify, const PeciSysfs& peci_sysfs) |
| : PeciHwmonBasedSensor( |
| input_dev_path, io_context, |
| CreateStaticAttributes(sensor_name, sensor_unit, hal_common_config, |
| entity_common_config, reading_range_configs, |
| reading_transform_config), |
| threshold_configs, std::move(on_batch_notify), peci_sysfs), |
| sensor_type_(sensor_type), |
| sensor_name_(sensor_name), |
| sensor_label_(sensor_label), |
| dts_offset_(dts_offset), |
| retry_timer_(*io_context) {} |
| |
| IntelCpuSensor::IntelCpuSensor( |
| const PeciSysfs& peci_sysfs, const IntelCpuSensorConfig& config, |
| const std::string& sensor_label, |
| const std::shared_ptr<boost::asio::io_context>& io_context, |
| const std::vector<uint32_t>& retry_delays) |
| : PeciHwmonBasedSensor("", io_context, |
| CreateStaticAttributes( |
| "", SensorUnit::UNIT_UNKNOWN, |
| config.hal_common_config(), EntityCommonConfig(), |
| ReadingRangeConfigs(), ReadingTransformConfig()), |
| ThresholdConfigs(), std::nullopt, peci_sysfs), |
| retry_delays_(retry_delays), |
| sensor_label_(sensor_label), |
| retry_timer_(*io_context) {} |
| |
| } // namespace milotic_tlbmc |