| #include "tlbmc/collector/peci_scanner.h" |
| |
| #include <array> |
| #include <cerrno> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstring> |
| #include <fstream> |
| #include <ios> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/functional/any_invocable.h" |
| #include "absl/log/log.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/time/time.h" |
| #include "absl/types/span.h" |
| #include "boost/asio.hpp" //NOLINT: boost::asio is commonly used in BMC |
| #include "fan_controller_config.pb.h" |
| #include "fan_pwm_config.pb.h" |
| #include "fan_tach_config.pb.h" |
| #include "hwmon_temp_sensor_config.pb.h" |
| #include "intel_cpu_sensor_config.pb.h" |
| #include "psu_sensor_config.pb.h" |
| #include "shared_mem_sensor_config.pb.h" |
| #include "virtual_sensor_config.pb.h" |
| #include "tlbmc/hal/peci/peci_access_interface.h" |
| #include "tlbmc/hal/sysfs/peci.h" |
| #include "resource.pb.h" |
| #include "sensor.pb.h" |
| #include "tlbmc/scheduler/scheduler.h" |
| #include "tlbmc/sensors/intel_cpu_sensor.h" |
| #include "hal/peci/linux/peci-ioctl.h" |
| #include "peci.h" |
| |
| namespace milotic_tlbmc { |
| |
| PeciScanner::PeciScanner( |
| absl::Span<const IntelCpuSensorConfig> intel_cpu_sensor_configs, |
| const PeciSysfs& peci_sysfs, const PeciAccessInterface& peci_access, |
| TaskScheduler* task_scheduler, |
| absl::AnyInvocable<void(const std::string&, const std::string&) const> |
| callback) |
| : intel_cpu_sensor_configs_(std::move(intel_cpu_sensor_configs)), |
| peci_sysfs_(peci_sysfs), |
| peci_access_(peci_access), |
| task_scheduler_(task_scheduler), |
| reinitialize_callback_(std::move(callback)) {} |
| |
| std::unique_ptr<PeciScanner> PeciScanner::Create( |
| absl::Span<const IntelCpuSensorConfig> intel_cpu_sensor_configs, |
| const PeciSysfs& peci_sysfs, const PeciAccessInterface& peci_access, |
| TaskScheduler* task_scheduler, |
| absl::AnyInvocable<void(const std::string&, const std::string&) const> |
| callback) { |
| return absl::WrapUnique(new PeciScanner(intel_cpu_sensor_configs, peci_sysfs, |
| peci_access, task_scheduler, |
| std::move(callback))); |
| } |
| |
| void PeciScanner::StartScan() { |
| for (const auto& config : intel_cpu_sensor_configs_) { |
| LOG(INFO) << "Setting up PeciDeviceScan for " << config.name() << " at " |
| << config.hal_common_config().bus() << " " |
| << config.hal_common_config().address(); |
| size_t scan_context_idx = GetPeciScanContextsSize(); |
| AddPeciScanContext(PeciScanContext{ |
| .rescan_delay_seconds = 0, .cpu_state = IntelCpuSensor::CpuState::OFF}); |
| AttemptPeciDeviceScan(config, scan_context_idx); |
| } |
| } |
| |
| void PeciScanner::AttemptPeciDeviceScan(const IntelCpuSensorConfig& config, |
| size_t scan_context_idx) { |
| // The logic for determining if the CPU and DIMMs are ready is based on the |
| // implementation at |
| // https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L491 |
| // A diagram is provided at: |
| // https://docs.google.com/drawings/d/19kGXSh7Sa7_Xd_dEzR2EYsG1lC7o4t8p7FqtBUHGfws/edit?resourcekey=0-A7KASgf41b6R0C-9Oy_DCQ |
| LOG(INFO) << "Attempting PeciDeviceScan for " << config.name() << " at " |
| << config.hal_common_config().bus() << " " |
| << config.hal_common_config().address(); |
| PeciScanContext scan_context = GetPeciScanContext(scan_context_idx); |
| // Attempt to check if the peci rescan path is able to be opened, if it is |
| // we can verify status by checking for the hwmon files directly. Otherwise, |
| // use peci interface to verify CPU/DIMM state. |
| std::fstream rescan{peci_sysfs_.GetPeciRescanPath().string(), std::ios::out}; |
| if (rescan.is_open()) { |
| UpdateCpuStateFromSysfs(config, scan_context, rescan); |
| } else { |
| UpdateCpuStateFromPeciInterface(config, scan_context); |
| } |
| |
| // If the CPU is ready, we should reinitialize the sensors, this will stop |
| // rescheduling this task. |
| if (scan_context.cpu_state == IntelCpuSensor::CpuState::READY) { |
| for (const auto& [label, name] : config.label_to_name()) { |
| std::string sensor_name = IntelCpuSensor::CreateSensorName( |
| name.empty() ? label : name, config.cpu_id()); |
| reinitialize_callback_(config.entity_common_config().board_config_key(), |
| sensor_name); |
| } |
| LOG(INFO) << "All Peci sensors reinitialized, ending PeciDeviceScan task"; |
| } |
| UpdatePeciScanContext(scan_context_idx, scan_context); |
| ReschedulePeciDeviceScan(config, scan_context_idx); |
| } |
| |
| void PeciScanner::UpdateCpuStateFromSysfs(const IntelCpuSensorConfig& config, |
| PeciScanContext& scan_context, |
| std::fstream& rescan) { |
| LOG(INFO) << "Peci rescan path is open, using Sysfs to check CPU state."; |
| boost::filesystem::path device_path = |
| peci_sysfs_.GetBusPath(config.hal_common_config().bus()) / |
| peci_sysfs_.GetDeviceDirectoryName(config.hal_common_config()); |
| |
| // Check for DIMM temp files |
| if (!IntelCpuSensor::FindFiles(device_path, *IntelCpuSensor::kDimmTempRegex, |
| 3) |
| .empty()) { |
| scan_context.cpu_state = IntelCpuSensor::CpuState::READY; |
| scan_context.rescan_delay_seconds = 5; |
| return; |
| } |
| |
| // Check for CPU temp files |
| if (!IntelCpuSensor::FindFiles(device_path, *IntelCpuSensor::kCpuTempRegex, 3) |
| .empty()) { |
| scan_context.cpu_state = IntelCpuSensor::CpuState::ON; |
| scan_context.rescan_delay_seconds = 3; |
| return; |
| } |
| |
| // If DIMM and CPU temp files are not found, the device is not ready, |
| // use the peci rescan interface to rescan all Peci devices. |
| // https://www.kernel.org/doc/html/latest/admin-guide/abi-testing.html#abi-sys-bus-peci-rescan |
| // Rescan already opened to reach this branch. |
| rescan << "1"; |
| scan_context.cpu_state = IntelCpuSensor::CpuState::OFF; |
| scan_context.rescan_delay_seconds = 30; |
| } |
| |
| bool PeciScanner::CheckDimmsReady(const IntelCpuSensorConfig& config) { |
| for (unsigned int rank = 0; rank < IntelCpuSensor::kRankNumMax; rank++) { |
| absl::StatusOr<std::array<uint8_t, 8>> pkg_config_status = |
| peci_access_.RdPkgConfig(config.hal_common_config().address(), |
| PECI_MBX_INDEX_DDR_DIMM_TEMP, rank, 4); |
| if (!pkg_config_status.ok()) { |
| // Cannot read package config, assume not ready |
| return false; |
| } |
| std::array<uint8_t, 8> pkg_config = *pkg_config_status; |
| if (((pkg_config[0] != 0xFF) && (pkg_config[0] != 0U)) || |
| ((pkg_config[1] != 0xFF) && (pkg_config[1] != 0U))) { |
| // At least one DIMM is present, this means ready |
| return true; |
| } |
| } |
| // No DIMMs detected |
| return false; |
| } |
| |
| bool PeciScanner::EnsurePeciDeviceExists(const IntelCpuSensorConfig& config) { |
| absl::StatusOr<std::array<uint8_t, 8>> pkg_config = peci_access_.RdPkgConfig( |
| config.hal_common_config().address(), PECI_MBX_INDEX_CPU_ID, 0, 4); |
| if (!pkg_config.ok()) { |
| LOG(ERROR) << "Failed to read package config for CPU ID at address: " |
| << config.hal_common_config().address(); |
| return false; |
| } |
| if (!peci_sysfs_.IsDevicePresent(config.hal_common_config()) && |
| !peci_sysfs_.NewDevice(config.hal_common_config(), "peci-client").ok()) { |
| LOG(ERROR) << "Failed to create new PECI device at " |
| << config.hal_common_config().bus() << " " |
| << config.hal_common_config().address(); |
| return false; |
| } |
| return true; |
| } |
| |
| void PeciScanner::UpdateCpuStateFromPeciInterface( |
| const IntelCpuSensorConfig& config, PeciScanContext& scan_context) { |
| LOG(INFO) << "Peci rescan path cannot be opened, using Peci Interface to " |
| "check CPU state."; |
| std::string peci_dev_path = |
| peci_sysfs_.GetPeciDevicePath(config.hal_common_config().bus()).string(); |
| peci_access_.SetDevName(peci_dev_path); |
| |
| absl::StatusOr<int> peci_fd = peci_access_.Lock(PECI_NO_WAIT); |
| if (!peci_fd.ok()) { |
| LOG(ERROR) << "Unable to open " << peci_dev_path << " " << strerror(errno); |
| scan_context.rescan_delay_seconds = 30; |
| return; |
| } |
| |
| IntelCpuSensor::CpuState new_state = IntelCpuSensor::CpuState::OFF; |
| if (!peci_access_.Ping(config.hal_common_config().address()).ok()) { |
| scan_context.cpu_state = IntelCpuSensor::CpuState::OFF; |
| scan_context.rescan_delay_seconds = 30; |
| peci_access_.Unlock(*peci_fd); |
| return; |
| } |
| |
| if (CheckDimmsReady(config)) { |
| new_state = IntelCpuSensor::CpuState::READY; |
| } else { |
| new_state = IntelCpuSensor::CpuState::ON; |
| } |
| |
| if (scan_context.cpu_state == new_state) { |
| // No state change |
| peci_access_.Unlock(*peci_fd); |
| return; |
| } |
| |
| // State transition logic |
| if (new_state == IntelCpuSensor::CpuState::ON) { |
| if (scan_context.cpu_state == IntelCpuSensor::CpuState::OFF) { |
| if (!EnsurePeciDeviceExists(config)) { |
| // Unable to read the CPU package config or failed to reinitialize the |
| // new device, assume it is off and try again later. |
| new_state = IntelCpuSensor::CpuState::OFF; |
| scan_context.rescan_delay_seconds = 30; |
| } else { |
| // The CPU is reinitialized or is on, but DIMMs not ready yet, try again |
| // in 3 seconds. |
| scan_context.rescan_delay_seconds = 3; |
| } |
| } |
| } else if (new_state == IntelCpuSensor::CpuState::READY) { |
| if (scan_context.cpu_state == IntelCpuSensor::CpuState::OFF) { |
| if (!EnsurePeciDeviceExists(config)) { |
| // DIMMs were ready, but now CPU is off or not detectable, assume it has |
| // powered off and try again in 30 seconds. |
| new_state = IntelCpuSensor::CpuState::OFF; |
| scan_context.rescan_delay_seconds = 30; |
| } else { |
| // DIMMs are ready, we will reinitialize sensors. |
| scan_context.rescan_delay_seconds = 5; |
| } |
| } else { |
| // Coming from ON, DIMMs are ready, we will reinitialize sensors. |
| scan_context.rescan_delay_seconds = 5; |
| } |
| DLOG(INFO) << "DIMMs are detected at: " << config.hal_common_config().bus() |
| << " " << config.hal_common_config().address(); |
| } |
| scan_context.cpu_state = new_state; |
| peci_access_.Unlock(*peci_fd); |
| } |
| |
| void PeciScanner::ReschedulePeciDeviceScan(const IntelCpuSensorConfig& config, |
| size_t scan_context_idx) { |
| if (GetPeciScanContext(scan_context_idx).cpu_state == |
| IntelCpuSensor::CpuState::READY) { |
| return; |
| } |
| task_scheduler_->ScheduleOneShotAsync( |
| [this, config, scan_context_idx](absl::AnyInvocable<void()> on_done) { |
| AttemptPeciDeviceScan(config, scan_context_idx); |
| on_done(); |
| }, |
| absl::Seconds(GetPeciScanContext(scan_context_idx).rescan_delay_seconds)); |
| } |
| |
| } // namespace milotic_tlbmc |