blob: 0dcdf241b1118bd14d01e96a6a465311e5de1164 [file] [log] [blame]
#include "tlbmc/collector/peci_scanner.h"
#include <array>
#include <cerrno>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <ios>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/functional/any_invocable.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "boost/asio.hpp" //NOLINT: boost::asio is commonly used in BMC
#include "fan_controller_config.pb.h"
#include "fan_pwm_config.pb.h"
#include "fan_tach_config.pb.h"
#include "hwmon_temp_sensor_config.pb.h"
#include "intel_cpu_sensor_config.pb.h"
#include "psu_sensor_config.pb.h"
#include "shared_mem_sensor_config.pb.h"
#include "virtual_sensor_config.pb.h"
#include "tlbmc/hal/peci/peci_access_interface.h"
#include "tlbmc/hal/sysfs/peci.h"
#include "resource.pb.h"
#include "sensor.pb.h"
#include "tlbmc/scheduler/scheduler.h"
#include "tlbmc/sensors/intel_cpu_sensor.h"
#include "hal/peci/linux/peci-ioctl.h"
#include "peci.h"
namespace milotic_tlbmc {
PeciScanner::PeciScanner(
absl::Span<const IntelCpuSensorConfig> intel_cpu_sensor_configs,
const PeciSysfs& peci_sysfs, const PeciAccessInterface& peci_access,
TaskScheduler* task_scheduler,
absl::AnyInvocable<void(const std::string&, const std::string&) const>
callback)
: intel_cpu_sensor_configs_(std::move(intel_cpu_sensor_configs)),
peci_sysfs_(peci_sysfs),
peci_access_(peci_access),
task_scheduler_(task_scheduler),
reinitialize_callback_(std::move(callback)) {}
std::unique_ptr<PeciScanner> PeciScanner::Create(
absl::Span<const IntelCpuSensorConfig> intel_cpu_sensor_configs,
const PeciSysfs& peci_sysfs, const PeciAccessInterface& peci_access,
TaskScheduler* task_scheduler,
absl::AnyInvocable<void(const std::string&, const std::string&) const>
callback) {
return absl::WrapUnique(new PeciScanner(intel_cpu_sensor_configs, peci_sysfs,
peci_access, task_scheduler,
std::move(callback)));
}
void PeciScanner::StartScan() {
for (const auto& config : intel_cpu_sensor_configs_) {
LOG(INFO) << "Setting up PeciDeviceScan for " << config.name() << " at "
<< config.hal_common_config().bus() << " "
<< config.hal_common_config().address();
size_t scan_context_idx = GetPeciScanContextsSize();
AddPeciScanContext(PeciScanContext{
.rescan_delay_seconds = 0, .cpu_state = IntelCpuSensor::CpuState::OFF});
AttemptPeciDeviceScan(config, scan_context_idx);
}
}
void PeciScanner::AttemptPeciDeviceScan(const IntelCpuSensorConfig& config,
size_t scan_context_idx) {
// The logic for determining if the CPU and DIMMs are ready is based on the
// implementation at
// https://github.com/openbmc/dbus-sensors/blob/master/src/intel-cpu/IntelCPUSensorMain.cpp#L491
// A diagram is provided at:
// https://docs.google.com/drawings/d/19kGXSh7Sa7_Xd_dEzR2EYsG1lC7o4t8p7FqtBUHGfws/edit?resourcekey=0-A7KASgf41b6R0C-9Oy_DCQ
LOG(INFO) << "Attempting PeciDeviceScan for " << config.name() << " at "
<< config.hal_common_config().bus() << " "
<< config.hal_common_config().address();
PeciScanContext scan_context = GetPeciScanContext(scan_context_idx);
// Attempt to check if the peci rescan path is able to be opened, if it is
// we can verify status by checking for the hwmon files directly. Otherwise,
// use peci interface to verify CPU/DIMM state.
std::fstream rescan{peci_sysfs_.GetPeciRescanPath().string(), std::ios::out};
if (rescan.is_open()) {
UpdateCpuStateFromSysfs(config, scan_context, rescan);
} else {
UpdateCpuStateFromPeciInterface(config, scan_context);
}
// If the CPU is ready, we should reinitialize the sensors, this will stop
// rescheduling this task.
if (scan_context.cpu_state == IntelCpuSensor::CpuState::READY) {
for (const auto& [label, name] : config.label_to_name()) {
std::string sensor_name = IntelCpuSensor::CreateSensorName(
name.empty() ? label : name, config.cpu_id());
reinitialize_callback_(config.entity_common_config().board_config_key(),
sensor_name);
}
LOG(INFO) << "All Peci sensors reinitialized, ending PeciDeviceScan task";
}
UpdatePeciScanContext(scan_context_idx, scan_context);
ReschedulePeciDeviceScan(config, scan_context_idx);
}
void PeciScanner::UpdateCpuStateFromSysfs(const IntelCpuSensorConfig& config,
PeciScanContext& scan_context,
std::fstream& rescan) {
LOG(INFO) << "Peci rescan path is open, using Sysfs to check CPU state.";
boost::filesystem::path device_path =
peci_sysfs_.GetBusPath(config.hal_common_config().bus()) /
peci_sysfs_.GetDeviceDirectoryName(config.hal_common_config());
// Check for DIMM temp files
if (!IntelCpuSensor::FindFiles(device_path, *IntelCpuSensor::kDimmTempRegex,
3)
.empty()) {
scan_context.cpu_state = IntelCpuSensor::CpuState::READY;
scan_context.rescan_delay_seconds = 5;
return;
}
// Check for CPU temp files
if (!IntelCpuSensor::FindFiles(device_path, *IntelCpuSensor::kCpuTempRegex, 3)
.empty()) {
scan_context.cpu_state = IntelCpuSensor::CpuState::ON;
scan_context.rescan_delay_seconds = 3;
return;
}
// If DIMM and CPU temp files are not found, the device is not ready,
// use the peci rescan interface to rescan all Peci devices.
// https://www.kernel.org/doc/html/latest/admin-guide/abi-testing.html#abi-sys-bus-peci-rescan
// Rescan already opened to reach this branch.
rescan << "1";
scan_context.cpu_state = IntelCpuSensor::CpuState::OFF;
scan_context.rescan_delay_seconds = 30;
}
bool PeciScanner::CheckDimmsReady(const IntelCpuSensorConfig& config) {
for (unsigned int rank = 0; rank < IntelCpuSensor::kRankNumMax; rank++) {
absl::StatusOr<std::array<uint8_t, 8>> pkg_config_status =
peci_access_.RdPkgConfig(config.hal_common_config().address(),
PECI_MBX_INDEX_DDR_DIMM_TEMP, rank, 4);
if (!pkg_config_status.ok()) {
// Cannot read package config, assume not ready
return false;
}
std::array<uint8_t, 8> pkg_config = *pkg_config_status;
if (((pkg_config[0] != 0xFF) && (pkg_config[0] != 0U)) ||
((pkg_config[1] != 0xFF) && (pkg_config[1] != 0U))) {
// At least one DIMM is present, this means ready
return true;
}
}
// No DIMMs detected
return false;
}
bool PeciScanner::EnsurePeciDeviceExists(const IntelCpuSensorConfig& config) {
absl::StatusOr<std::array<uint8_t, 8>> pkg_config = peci_access_.RdPkgConfig(
config.hal_common_config().address(), PECI_MBX_INDEX_CPU_ID, 0, 4);
if (!pkg_config.ok()) {
LOG(ERROR) << "Failed to read package config for CPU ID at address: "
<< config.hal_common_config().address();
return false;
}
if (!peci_sysfs_.IsDevicePresent(config.hal_common_config()) &&
!peci_sysfs_.NewDevice(config.hal_common_config(), "peci-client").ok()) {
LOG(ERROR) << "Failed to create new PECI device at "
<< config.hal_common_config().bus() << " "
<< config.hal_common_config().address();
return false;
}
return true;
}
void PeciScanner::UpdateCpuStateFromPeciInterface(
const IntelCpuSensorConfig& config, PeciScanContext& scan_context) {
LOG(INFO) << "Peci rescan path cannot be opened, using Peci Interface to "
"check CPU state.";
std::string peci_dev_path =
peci_sysfs_.GetPeciDevicePath(config.hal_common_config().bus()).string();
peci_access_.SetDevName(peci_dev_path);
absl::StatusOr<int> peci_fd = peci_access_.Lock(PECI_NO_WAIT);
if (!peci_fd.ok()) {
LOG(ERROR) << "Unable to open " << peci_dev_path << " " << strerror(errno);
scan_context.rescan_delay_seconds = 30;
return;
}
IntelCpuSensor::CpuState new_state = IntelCpuSensor::CpuState::OFF;
if (!peci_access_.Ping(config.hal_common_config().address()).ok()) {
scan_context.cpu_state = IntelCpuSensor::CpuState::OFF;
scan_context.rescan_delay_seconds = 30;
peci_access_.Unlock(*peci_fd);
return;
}
if (CheckDimmsReady(config)) {
new_state = IntelCpuSensor::CpuState::READY;
} else {
new_state = IntelCpuSensor::CpuState::ON;
}
if (scan_context.cpu_state == new_state) {
// No state change
peci_access_.Unlock(*peci_fd);
return;
}
// State transition logic
if (new_state == IntelCpuSensor::CpuState::ON) {
if (scan_context.cpu_state == IntelCpuSensor::CpuState::OFF) {
if (!EnsurePeciDeviceExists(config)) {
// Unable to read the CPU package config or failed to reinitialize the
// new device, assume it is off and try again later.
new_state = IntelCpuSensor::CpuState::OFF;
scan_context.rescan_delay_seconds = 30;
} else {
// The CPU is reinitialized or is on, but DIMMs not ready yet, try again
// in 3 seconds.
scan_context.rescan_delay_seconds = 3;
}
}
} else if (new_state == IntelCpuSensor::CpuState::READY) {
if (scan_context.cpu_state == IntelCpuSensor::CpuState::OFF) {
if (!EnsurePeciDeviceExists(config)) {
// DIMMs were ready, but now CPU is off or not detectable, assume it has
// powered off and try again in 30 seconds.
new_state = IntelCpuSensor::CpuState::OFF;
scan_context.rescan_delay_seconds = 30;
} else {
// DIMMs are ready, we will reinitialize sensors.
scan_context.rescan_delay_seconds = 5;
}
} else {
// Coming from ON, DIMMs are ready, we will reinitialize sensors.
scan_context.rescan_delay_seconds = 5;
}
DLOG(INFO) << "DIMMs are detected at: " << config.hal_common_config().bus()
<< " " << config.hal_common_config().address();
}
scan_context.cpu_state = new_state;
peci_access_.Unlock(*peci_fd);
}
void PeciScanner::ReschedulePeciDeviceScan(const IntelCpuSensorConfig& config,
size_t scan_context_idx) {
if (GetPeciScanContext(scan_context_idx).cpu_state ==
IntelCpuSensor::CpuState::READY) {
return;
}
task_scheduler_->ScheduleOneShotAsync(
[this, config, scan_context_idx](absl::AnyInvocable<void()> on_done) {
AttemptPeciDeviceScan(config, scan_context_idx);
on_done();
},
absl::Seconds(GetPeciScanContext(scan_context_idx).rescan_delay_seconds));
}
} // namespace milotic_tlbmc