blob: abf760d699f1ca8f449409e5429e3168f6cef2b2 [file] [log] [blame]
#include "tlbmc/store/store_impl.h"
#include <cstddef>
#include <filesystem>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/functional/any_invocable.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "g3/macros.h"
#include "nlohmann/json.hpp"
#include "tlbmc/central_config/config.h"
#include "tlbmc/collector/collector.h"
#include "tlbmc/collector/fru_collector.h"
#include "tlbmc/collector/gpio_collector.h"
#include "tlbmc/collector/metric_collector.h"
#include "tlbmc/collector/sensor_collector.h"
#include "ad_hoc_fru_config.pb.h"
#include "tlbmc/configs/entity_config.h"
#include "software_metrics_config.pb.h"
#include "topology_config.pb.h"
#include "tlbmc/hal/fru_scanner.h"
#include "tlbmc/hal/peci/peci_access_impl.h"
#include "tlbmc/hal/peci/peci_access_interface.h"
#include "fru.pb.h"
#include "sensor.pb.h"
#include "software_metrics.pb.h"
#include "tlbmc/scheduler/scheduler.h"
#include "tlbmc/sensors/sensor.h"
#include "tlbmc/store/store.h"
#include "tlbmc/trace/tracer.h"
#include "router_interface.h"
namespace milotic_tlbmc {
using ::crow::RouterInterface;
namespace {
constexpr int kDefaultPeriodicDumpIntervalMs = 1000 * 60 * 60; // 1 hour
} // namespace
absl::Status StoreImpl::ConfigureCollection(const Collector::Config& config,
Collector::Type type) const {
switch (type) {
case Collector::Type::kSensor:
return all_collectors_.sensor_collector->ConfigureCollection(config);
default:
return absl::InvalidArgumentError("Unsupported collector type");
}
}
std::vector<std::shared_ptr<const Sensor>> StoreImpl::GetAllSensors() const {
return all_collectors_.sensor_collector->GetAllSensors();
}
bool StoreImpl::IsConfigKeyOwningAllSensors(
absl::string_view config_key) const {
return entity_config_->IsConfigKeyOwningAllSensors(config_key);
}
std::shared_ptr<const Sensor> StoreImpl::GetSensorBySensorKey(
const std::string& sensor_key) const {
return all_collectors_.sensor_collector->GetSensorBySensorKey(sensor_key);
}
std::vector<std::string> StoreImpl::GetAllSensorKeysByConfigKey(
const std::string& board_config_key) const {
return all_collectors_.sensor_collector->GetAllSensorKeysByConfigKey(
board_config_key);
}
std::shared_ptr<const Sensor> StoreImpl::GetSensorByConfigKeyAndSensorKey(
const std::string& board_config_key, const std::string& sensor_key) const {
return all_collectors_.sensor_collector->GetSensorByConfigKeyAndSensorKey(
board_config_key, sensor_key);
}
absl::Status StoreImpl::WriteToSensor(const std::string& sensor_key,
const SensorValue& value) {
return all_collectors_.sensor_collector->WriteToSensor(sensor_key, value);
}
absl::StatusOr<const std::string&> StoreImpl::GetDevpathFromSensor(
std::shared_ptr<const Sensor> sensor) const {
// Get devpath for the sensor.
// 1. Get the related item key from static attributes.
// 2. Get topology node from related item key.
// 3. Get devpath from topology node.
const SensorAttributesStatic& static_attributes =
sensor->GetSensorAttributesStatic();
absl::string_view related_item_key =
static_attributes.entity_common_config().related_item().id();
// For fans, there may be several sensors with same related item id, we must
// get the true related item key from the parent board.
if (static_attributes.entity_common_config().related_item().type() ==
RESOURCE_TYPE_FAN) {
ECCLESIA_ASSIGN_OR_RETURN(
const TopologyConfigNode* parent_board,
GetFruTopology(
static_attributes.entity_common_config().board_config_key()));
// The fan id should always be present in the parent board's children
// fans map.
auto fan_it = parent_board->children_fans().find(related_item_key);
if (fan_it == parent_board->children_fans().end()) {
return absl::NotFoundError(absl::Substitute(
"Fan $0 not found in parent board $1", related_item_key,
parent_board->fru_info().fru_key()));
}
related_item_key = fan_it->second;
} else if (static_attributes.entity_common_config().related_item().type() ==
RESOURCE_TYPE_PROCESSOR) {
ECCLESIA_ASSIGN_OR_RETURN(
const TopologyConfigNode* parent_board,
GetFruTopology(
static_attributes.entity_common_config().board_config_key()));
// The processor id should always be present in the parent board's children
// fans map.
auto processor_it =
parent_board->children_processors().find(related_item_key);
if (processor_it == parent_board->children_processors().end()) {
return absl::NotFoundError(absl::Substitute(
"Processor $0 not found in parent board $1", related_item_key,
parent_board->fru_info().fru_key()));
}
related_item_key = processor_it->second;
} else if (static_attributes.entity_common_config().related_item().type() ==
RESOURCE_TYPE_DIMM) {
ECCLESIA_ASSIGN_OR_RETURN(
const TopologyConfigNode* parent_board,
GetFruTopology(
static_attributes.entity_common_config().board_config_key()));
// The dimm id should always be present in the parent board's children
// fans map.
auto dimm_it = parent_board->children_dimms().find(related_item_key);
if (dimm_it == parent_board->children_dimms().end()) {
return absl::NotFoundError(absl::Substitute(
"DIMM $0 not found in parent board $1", related_item_key,
parent_board->fru_info().fru_key()));
}
related_item_key = dimm_it->second;
}
ECCLESIA_ASSIGN_OR_RETURN(const TopologyConfigNode* topology_node,
GetFruTopology(related_item_key));
return topology_node->location_context().devpath();
}
absl::StatusOr<const Fru*> StoreImpl::GetFru(absl::string_view key) const {
return entity_config_->GetFru(key);
}
absl::StatusOr<const FruTable*> StoreImpl::GetAllFrus() const {
return entity_config_->GetAllFrus();
}
SoftwareMetricsValue StoreImpl::GetMetricValues() const {
return all_collectors_.metric_collector->GetMetricValues();
}
SocketStatStates StoreImpl::GetMetricSocketStatValues() const {
return all_collectors_.metric_collector->GetSocketStatMetricsValues();
}
NetFilterStates StoreImpl::GetMetricNetFilterValues() const {
return all_collectors_.metric_collector->GetNetFilterValues();
}
nlohmann::json StoreImpl::ToJson() const {
nlohmann::json json;
json["Sensor"] = all_collectors_.sensor_collector->ToJson();
json["Fru"] = all_collectors_.fru_collector->ToJson();
json["Metric"] = all_collectors_.metric_collector->ToJson();
json["Gpio"] = all_collectors_.gpio_collector->ToJson();
json["EntityConfig"] = entity_config_->ToJson();
return json;
}
nlohmann::json StoreImpl::GetSchedulerStats() const {
nlohmann::json json;
json["SensorCollector"] =
all_collectors_.sensor_collector->GetSchedulerStats();
json["FruCollector"] = all_collectors_.fru_collector->GetSchedulerStats();
json["MetricCollector"] =
all_collectors_.metric_collector->GetSchedulerStats();
json["GpioCollector"] = all_collectors_.gpio_collector->GetSchedulerStats();
json["Store"] = task_scheduler_->ToJson();
return json;
}
Store::Metrics StoreImpl::GetMetrics() const { return metrics_; }
absl::StatusOr<const TopologyConfigNode*> StoreImpl::GetFruTopology(
absl::string_view config_key) const {
return entity_config_->GetFruTopologyByConfig(config_key);
}
absl::StatusOr<const TopologyConfig*> StoreImpl::GetTopologyConfig() const {
return entity_config_->GetTopologyConfig();
}
absl::StatusOr<std::vector<std::string>> StoreImpl::GetAllConfigKeys() const {
return entity_config_->GetAllConfigKeys();
}
absl::StatusOr<std::string> StoreImpl::GetConfigKeyByFruKey(
absl::string_view fru_key) const {
return entity_config_->GetConfigKeyByFruKey(fru_key);
}
absl::StatusOr<std::string> StoreImpl::GetFruKeyByConfigKey(
absl::string_view config_key) const {
return entity_config_->GetFruKeyByConfigKey(config_key);
}
absl::StatusOr<std::vector<std::pair<std::string, std::string>>>
StoreImpl::GetFanInfoByConfigKey(absl::string_view config_name) const {
return entity_config_->GetFanInfoByConfigKey(config_name);
}
absl::StatusOr<std::unique_ptr<StoreImpl>> StoreImpl::Create(
Options&& options) {
Metrics metrics_at_bootup;
absl::Time create_start_time = absl::Now();
if (options.fru_scanners.empty()) {
return absl::InvalidArgumentError("FruScanners is empty");
}
if (options.i2c_sysfs == nullptr) {
return absl::InvalidArgumentError("I2cSysfs is null");
}
if (options.i3c_sysfs == nullptr) {
return absl::InvalidArgumentError("I3cSysfs is null");
}
if (options.peci_sysfs == nullptr) {
return absl::InvalidArgumentError("PeciSysfs is null");
}
if (options.proto_parser == nullptr) {
return absl::InvalidArgumentError("Proto config parser is null");
}
options.proto_parser->LoadProtoConfigs();
absl::Time parse_configs_start_time = absl::Now();
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Parse-Configs-Begin",
parse_configs_start_time);
ECCLESIA_RETURN_IF_ERROR(
options.entity_config_reader->LoadEntityConfig(options.config_location));
std::vector<AdHocFruConfig> ad_hoc_fru_scanning_configs =
FruCollector::Options::ParseAdHocFruConfigs(
options.entity_config_reader->GetConfig());
absl::Time parse_configs_end_time = absl::Now();
metrics_at_bootup.config_parse_duration =
parse_configs_end_time - parse_configs_start_time;
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Parse-Configs-End",
parse_configs_end_time);
size_t ad_hoc_fru_count = ad_hoc_fru_scanning_configs.size();
absl::flat_hash_map<AdHocScannerType, FruScanner*> fru_scanners_raw;
for (const auto& [type, scanner] : options.fru_scanners) {
fru_scanners_raw[type] = scanner.get();
}
FruCollector::Options fru_collector_options = {
.fru_scanners = std::move(fru_scanners_raw),
.ad_hoc_fru_scanning_configs = std::move(ad_hoc_fru_scanning_configs)};
AllCollectors all_collectors = {
.sensor_collector = EmptySensorCollector::Create(),
.fru_collector = EmptyFruCollector::Create(),
.metric_collector = EmptyMetricCollector::Create(),
.gpio_collector = EmptyGpioCollector::Create(),
};
std::shared_ptr<EntityConfig> entity_config_shared =
EmptyEntityConfigImpl::Create();
if (GetTlbmcConfig().fru_collector_module().enabled()) {
absl::Time fru_scan_start_time = absl::Now();
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Scan-FRUs-Begin",
fru_scan_start_time);
ECCLESIA_ASSIGN_OR_RETURN(
std::unique_ptr<FruCollector> fru_collector,
options.collector_factory->CreateFruCollector(fru_collector_options));
const RawFruTable fru_table = fru_collector->GetCopyOfCurrentScannedFrus();
absl::Time fru_scan_end_time = absl::Now();
metrics_at_bootup.fru_scan_and_collector_create_duration =
fru_scan_end_time - fru_scan_start_time;
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Scan-FRUs-End",
fru_scan_end_time);
// Load Configs
absl::Time load_configs_start_time = absl::Now();
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Load-Configs-Begin",
load_configs_start_time);
absl::StatusOr<std::unique_ptr<EntityConfig>> entity_config =
options.entity_config_reader->CreateEntityConfig(fru_table,
ad_hoc_fru_count);
if (!entity_config.ok()) {
LOG(ERROR) << "Failed to create entity config: "
<< entity_config.status();
LOG(ERROR) << "The cached FRU table will be deleted.";
// If Store is bad, then cache may be bad too. Let cache reload on next
// boot.
std::filesystem::remove(fru_collector_options.cached_fru_table_path);
return entity_config.status();
}
absl::Time load_configs_end_time = absl::Now();
metrics_at_bootup.topology_config_load_duration =
load_configs_end_time - load_configs_start_time;
Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Load-Configs-End",
load_configs_end_time);
// EntityConfig must be shared as it is used by the FruCollector.
entity_config_shared = absl::ShareUniquePtr(std::move(*entity_config));
fru_collector->SetEntityConfig(entity_config_shared);
all_collectors.fru_collector = std::move(fru_collector);
if (GetTlbmcConfig().sensor_collector_module().enabled()) {
// Now create all the collectors one by one.
absl::Time sensor_collector_create_start_time = absl::Now();
Tracer::GetInstance().AddOneOffDatapoint(
"Tlbmc-Create-SensorCollector-Begin",
sensor_collector_create_start_time);
SensorCollector::SensorConfigs sensor_configs;
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.hwmon_temp_sensor_configs,
entity_config_shared->GetAllHwmonTempSensorConfigs());
ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.psu_sensor_configs,
entity_config_shared->GetAllPsuSensorConfigs());
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.fan_controller_configs,
entity_config_shared->GetAllFanControllerConfigs());
ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.fan_pwm_configs,
entity_config_shared->GetAllFanPwmConfigs());
ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.fan_tach_configs,
entity_config_shared->GetAllFanTachConfigs());
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.shared_mem_sensor_configs,
entity_config_shared->GetAllSharedMemSensorConfigs());
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.virtual_sensor_configs,
entity_config_shared->GetAllVirtualSensorConfigs());
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.intel_cpu_sensor_configs,
entity_config_shared->GetAllIntelCpuSensorConfigs());
ECCLESIA_ASSIGN_OR_RETURN(
sensor_configs.nic_telemetry_configs,
entity_config_shared->GetAllNicTelemetryConfigs());
std::unique_ptr<PeciAccessInterface> peci_access =
std::make_unique<PeciAccessImpl>();
absl::StatusOr<std::unique_ptr<SensorCollector>> sensor_collector =
options.collector_factory->CreateSensorCollector(
{.sensor_configs = std::move(sensor_configs),
.i2c_sysfs = *options.i2c_sysfs,
.i3c_sysfs = *options.i3c_sysfs,
.peci_sysfs = *options.peci_sysfs,
.peci_access = *options.peci_access,
.override_sensor_sampling_interval_ms =
options.override_sensor_sampling_interval_ms});
if (!sensor_collector.ok()) {
LOG(ERROR) << "Failed to create sensor collector: "
<< sensor_collector.status();
LOG(ERROR) << "The cached FRU table will be deleted.";
// If Store is bad, then cache may be bad too. Let cache reload on next
// boot.
std::filesystem::remove(fru_collector_options.cached_fru_table_path);
return sensor_collector.status();
}
absl::Time sensor_collector_create_end_time = absl::Now();
metrics_at_bootup.sensor_collector_create_duration =
sensor_collector_create_end_time - sensor_collector_create_start_time;
Tracer::GetInstance().AddOneOffDatapoint(
"Tlbmc-Create-SensorCollector-End", sensor_collector_create_end_time);
all_collectors.sensor_collector = std::move(*sensor_collector);
entity_config_shared->SetSensorCollector(
all_collectors.sensor_collector.get());
}
}
// Add support for other collectors here
if (GetTlbmcConfig().metric_collector_module().enabled()) {
absl::StatusOr<std::unique_ptr<MetricCollector>> metric_collector;
MetricCollector::Params params = {
.metric_configs = options.proto_parser->GetSoftwareMetricsConfig()};
if (options.executor_command_map.empty()) {
metric_collector =
options.collector_factory->CreateMetricCollector(params);
} else {
params.executor_command_map = options.executor_command_map;
metric_collector = MetricCollector::CreateForUnitTest(params);
}
if (!metric_collector.ok()) {
LOG(ERROR) << "Failed to create metric collector: "
<< metric_collector.status();
return metric_collector.status();
}
all_collectors.metric_collector = std::move(*metric_collector);
}
if (GetTlbmcConfig().gpio_collector_module().enabled()) {
absl::StatusOr<std::unique_ptr<GpioCollector>> gpio_collector =
options.collector_factory->CreateGpioCollector(
{.gpio_configs = options.proto_parser->GetGpioConfigs()});
if (!gpio_collector.ok()) {
LOG(ERROR) << "Failed to create GPIO collector: "
<< gpio_collector.status();
return gpio_collector.status();
}
all_collectors.gpio_collector = std::move(*gpio_collector);
}
// Create the store
int periodic_dump_interval_ms =
options.override_store_snapshot_interval_ms.value_or(
kDefaultPeriodicDumpIntervalMs);
metrics_at_bootup.time_to_ready = absl::Now() - create_start_time;
// Trigger AdHocFruScanning after every collector has been initialized to
// prevent race conditions
all_collectors.fru_collector->SetUpAdHocFruScanning();
return absl::WrapUnique(new StoreImpl(
std::move(options), std::move(all_collectors),
std::move(entity_config_shared),
absl::Milliseconds(periodic_dump_interval_ms), metrics_at_bootup));
}
void StoreImpl::SetSmartRouter(RouterInterface* smart_router) {
entity_config_->SetSmartRouter(smart_router);
}
StoreImpl::StoreImpl(Options&& options, AllCollectors all_collectors,
std::shared_ptr<EntityConfig> entity_config,
absl::Duration store_snapshot_interval,
Metrics metrics_at_bootup)
: options_(std::move(options)),
all_collectors_(std::move(all_collectors)),
entity_config_(std::move(entity_config)),
task_scheduler_(std::make_unique<TaskScheduler>()),
metrics_(metrics_at_bootup) {
task_scheduler_->RunAndScheduleAsync(
[this](absl::AnyInvocable<void()> done) {
// Dump the store snapshot.
LOG(WARNING) << "=== Store snapshot ===";
LOG(WARNING) << ToJson().dump();
LOG(WARNING) << "=== END ===";
// Dump the collector scheduler stats.
LOG(WARNING) << "=== Collector Scheduler stats ===";
LOG(WARNING) << GetSchedulerStats().dump();
LOG(WARNING) << "=== END ===";
// Dump the central config.
LOG(WARNING) << "=== Central Config ===";
LOG(WARNING) << GetTlbmcConfig();
LOG(WARNING) << "=== END ===";
done();
},
store_snapshot_interval);
}
// Since the task scheduler is a member variable, we need to stop it before
// destructing since the store is running an async task that is accessing the
// task scheduler.
StoreImpl::~StoreImpl() { task_scheduler_->Stop(); }
void StoreImpl::StartSensorCollection() {
all_collectors_.sensor_collector->StartCollection();
}
} // namespace milotic_tlbmc