| #include "tlbmc/store/store_impl.h" |
| |
| #include <cstddef> |
| #include <filesystem> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/functional/any_invocable.h" |
| #include "absl/log/log.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/strings/substitute.h" |
| #include "absl/time/clock.h" |
| #include "absl/time/time.h" |
| #include "g3/macros.h" |
| #include "nlohmann/json.hpp" |
| #include "tlbmc/central_config/config.h" |
| #include "tlbmc/collector/collector.h" |
| #include "tlbmc/collector/fru_collector.h" |
| #include "tlbmc/collector/gpio_collector.h" |
| #include "tlbmc/collector/metric_collector.h" |
| #include "tlbmc/collector/sensor_collector.h" |
| #include "ad_hoc_fru_config.pb.h" |
| #include "tlbmc/configs/entity_config.h" |
| #include "software_metrics_config.pb.h" |
| #include "topology_config.pb.h" |
| #include "tlbmc/hal/fru_scanner.h" |
| #include "tlbmc/hal/peci/peci_access_impl.h" |
| #include "tlbmc/hal/peci/peci_access_interface.h" |
| #include "fru.pb.h" |
| #include "sensor.pb.h" |
| #include "software_metrics.pb.h" |
| #include "tlbmc/scheduler/scheduler.h" |
| #include "tlbmc/sensors/sensor.h" |
| #include "tlbmc/store/store.h" |
| #include "tlbmc/trace/tracer.h" |
| #include "router_interface.h" |
| |
| namespace milotic_tlbmc { |
| |
| using ::crow::RouterInterface; |
| |
| namespace { |
| |
| constexpr int kDefaultPeriodicDumpIntervalMs = 1000 * 60 * 60; // 1 hour |
| |
| } // namespace |
| |
| absl::Status StoreImpl::ConfigureCollection(const Collector::Config& config, |
| Collector::Type type) const { |
| switch (type) { |
| case Collector::Type::kSensor: |
| return all_collectors_.sensor_collector->ConfigureCollection(config); |
| default: |
| return absl::InvalidArgumentError("Unsupported collector type"); |
| } |
| } |
| |
| std::vector<std::shared_ptr<const Sensor>> StoreImpl::GetAllSensors() const { |
| return all_collectors_.sensor_collector->GetAllSensors(); |
| } |
| |
| bool StoreImpl::IsConfigKeyOwningAllSensors( |
| absl::string_view config_key) const { |
| return entity_config_->IsConfigKeyOwningAllSensors(config_key); |
| } |
| |
| std::shared_ptr<const Sensor> StoreImpl::GetSensorBySensorKey( |
| const std::string& sensor_key) const { |
| return all_collectors_.sensor_collector->GetSensorBySensorKey(sensor_key); |
| } |
| |
| std::vector<std::string> StoreImpl::GetAllSensorKeysByConfigKey( |
| const std::string& board_config_key) const { |
| return all_collectors_.sensor_collector->GetAllSensorKeysByConfigKey( |
| board_config_key); |
| } |
| |
| std::shared_ptr<const Sensor> StoreImpl::GetSensorByConfigKeyAndSensorKey( |
| const std::string& board_config_key, const std::string& sensor_key) const { |
| return all_collectors_.sensor_collector->GetSensorByConfigKeyAndSensorKey( |
| board_config_key, sensor_key); |
| } |
| |
| absl::Status StoreImpl::WriteToSensor(const std::string& sensor_key, |
| const SensorValue& value) { |
| return all_collectors_.sensor_collector->WriteToSensor(sensor_key, value); |
| } |
| |
| absl::StatusOr<const std::string&> StoreImpl::GetDevpathFromSensor( |
| std::shared_ptr<const Sensor> sensor) const { |
| // Get devpath for the sensor. |
| // 1. Get the related item key from static attributes. |
| // 2. Get topology node from related item key. |
| // 3. Get devpath from topology node. |
| const SensorAttributesStatic& static_attributes = |
| sensor->GetSensorAttributesStatic(); |
| absl::string_view related_item_key = |
| static_attributes.entity_common_config().related_item().id(); |
| // For fans, there may be several sensors with same related item id, we must |
| // get the true related item key from the parent board. |
| if (static_attributes.entity_common_config().related_item().type() == |
| RESOURCE_TYPE_FAN) { |
| ECCLESIA_ASSIGN_OR_RETURN( |
| const TopologyConfigNode* parent_board, |
| GetFruTopology( |
| static_attributes.entity_common_config().board_config_key())); |
| // The fan id should always be present in the parent board's children |
| // fans map. |
| auto fan_it = parent_board->children_fans().find(related_item_key); |
| if (fan_it == parent_board->children_fans().end()) { |
| return absl::NotFoundError(absl::Substitute( |
| "Fan $0 not found in parent board $1", related_item_key, |
| parent_board->fru_info().fru_key())); |
| } |
| related_item_key = fan_it->second; |
| } else if (static_attributes.entity_common_config().related_item().type() == |
| RESOURCE_TYPE_PROCESSOR) { |
| ECCLESIA_ASSIGN_OR_RETURN( |
| const TopologyConfigNode* parent_board, |
| GetFruTopology( |
| static_attributes.entity_common_config().board_config_key())); |
| // The processor id should always be present in the parent board's children |
| // fans map. |
| auto processor_it = |
| parent_board->children_processors().find(related_item_key); |
| if (processor_it == parent_board->children_processors().end()) { |
| return absl::NotFoundError(absl::Substitute( |
| "Processor $0 not found in parent board $1", related_item_key, |
| parent_board->fru_info().fru_key())); |
| } |
| related_item_key = processor_it->second; |
| } else if (static_attributes.entity_common_config().related_item().type() == |
| RESOURCE_TYPE_DIMM) { |
| ECCLESIA_ASSIGN_OR_RETURN( |
| const TopologyConfigNode* parent_board, |
| GetFruTopology( |
| static_attributes.entity_common_config().board_config_key())); |
| // The dimm id should always be present in the parent board's children |
| // fans map. |
| auto dimm_it = parent_board->children_dimms().find(related_item_key); |
| if (dimm_it == parent_board->children_dimms().end()) { |
| return absl::NotFoundError(absl::Substitute( |
| "DIMM $0 not found in parent board $1", related_item_key, |
| parent_board->fru_info().fru_key())); |
| } |
| related_item_key = dimm_it->second; |
| } |
| ECCLESIA_ASSIGN_OR_RETURN(const TopologyConfigNode* topology_node, |
| GetFruTopology(related_item_key)); |
| |
| return topology_node->location_context().devpath(); |
| } |
| |
| absl::StatusOr<const Fru*> StoreImpl::GetFru(absl::string_view key) const { |
| return entity_config_->GetFru(key); |
| } |
| |
| absl::StatusOr<const FruTable*> StoreImpl::GetAllFrus() const { |
| return entity_config_->GetAllFrus(); |
| } |
| |
| SoftwareMetricsValue StoreImpl::GetMetricValues() const { |
| return all_collectors_.metric_collector->GetMetricValues(); |
| } |
| |
| SocketStatStates StoreImpl::GetMetricSocketStatValues() const { |
| return all_collectors_.metric_collector->GetSocketStatMetricsValues(); |
| } |
| |
| NetFilterStates StoreImpl::GetMetricNetFilterValues() const { |
| return all_collectors_.metric_collector->GetNetFilterValues(); |
| } |
| |
| nlohmann::json StoreImpl::ToJson() const { |
| nlohmann::json json; |
| json["Sensor"] = all_collectors_.sensor_collector->ToJson(); |
| json["Fru"] = all_collectors_.fru_collector->ToJson(); |
| json["Metric"] = all_collectors_.metric_collector->ToJson(); |
| json["Gpio"] = all_collectors_.gpio_collector->ToJson(); |
| json["EntityConfig"] = entity_config_->ToJson(); |
| return json; |
| } |
| |
| nlohmann::json StoreImpl::GetSchedulerStats() const { |
| nlohmann::json json; |
| json["SensorCollector"] = |
| all_collectors_.sensor_collector->GetSchedulerStats(); |
| json["FruCollector"] = all_collectors_.fru_collector->GetSchedulerStats(); |
| json["MetricCollector"] = |
| all_collectors_.metric_collector->GetSchedulerStats(); |
| json["GpioCollector"] = all_collectors_.gpio_collector->GetSchedulerStats(); |
| json["Store"] = task_scheduler_->ToJson(); |
| return json; |
| } |
| |
| Store::Metrics StoreImpl::GetMetrics() const { return metrics_; } |
| |
| absl::StatusOr<const TopologyConfigNode*> StoreImpl::GetFruTopology( |
| absl::string_view config_key) const { |
| return entity_config_->GetFruTopologyByConfig(config_key); |
| } |
| |
| absl::StatusOr<const TopologyConfig*> StoreImpl::GetTopologyConfig() const { |
| return entity_config_->GetTopologyConfig(); |
| } |
| |
| absl::StatusOr<std::vector<std::string>> StoreImpl::GetAllConfigKeys() const { |
| return entity_config_->GetAllConfigKeys(); |
| } |
| |
| absl::StatusOr<std::string> StoreImpl::GetConfigKeyByFruKey( |
| absl::string_view fru_key) const { |
| return entity_config_->GetConfigKeyByFruKey(fru_key); |
| } |
| |
| absl::StatusOr<std::string> StoreImpl::GetFruKeyByConfigKey( |
| absl::string_view config_key) const { |
| return entity_config_->GetFruKeyByConfigKey(config_key); |
| } |
| |
| absl::StatusOr<std::vector<std::pair<std::string, std::string>>> |
| StoreImpl::GetFanInfoByConfigKey(absl::string_view config_name) const { |
| return entity_config_->GetFanInfoByConfigKey(config_name); |
| } |
| |
| absl::StatusOr<std::unique_ptr<StoreImpl>> StoreImpl::Create( |
| Options&& options) { |
| Metrics metrics_at_bootup; |
| absl::Time create_start_time = absl::Now(); |
| if (options.fru_scanners.empty()) { |
| return absl::InvalidArgumentError("FruScanners is empty"); |
| } |
| if (options.i2c_sysfs == nullptr) { |
| return absl::InvalidArgumentError("I2cSysfs is null"); |
| } |
| if (options.i3c_sysfs == nullptr) { |
| return absl::InvalidArgumentError("I3cSysfs is null"); |
| } |
| if (options.peci_sysfs == nullptr) { |
| return absl::InvalidArgumentError("PeciSysfs is null"); |
| } |
| |
| if (options.proto_parser == nullptr) { |
| return absl::InvalidArgumentError("Proto config parser is null"); |
| } |
| options.proto_parser->LoadProtoConfigs(); |
| |
| absl::Time parse_configs_start_time = absl::Now(); |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Parse-Configs-Begin", |
| parse_configs_start_time); |
| |
| ECCLESIA_RETURN_IF_ERROR( |
| options.entity_config_reader->LoadEntityConfig(options.config_location)); |
| |
| std::vector<AdHocFruConfig> ad_hoc_fru_scanning_configs = |
| FruCollector::Options::ParseAdHocFruConfigs( |
| options.entity_config_reader->GetConfig()); |
| |
| absl::Time parse_configs_end_time = absl::Now(); |
| metrics_at_bootup.config_parse_duration = |
| parse_configs_end_time - parse_configs_start_time; |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Parse-Configs-End", |
| parse_configs_end_time); |
| |
| size_t ad_hoc_fru_count = ad_hoc_fru_scanning_configs.size(); |
| |
| absl::flat_hash_map<AdHocScannerType, FruScanner*> fru_scanners_raw; |
| for (const auto& [type, scanner] : options.fru_scanners) { |
| fru_scanners_raw[type] = scanner.get(); |
| } |
| FruCollector::Options fru_collector_options = { |
| .fru_scanners = std::move(fru_scanners_raw), |
| .ad_hoc_fru_scanning_configs = std::move(ad_hoc_fru_scanning_configs)}; |
| |
| AllCollectors all_collectors = { |
| .sensor_collector = EmptySensorCollector::Create(), |
| .fru_collector = EmptyFruCollector::Create(), |
| .metric_collector = EmptyMetricCollector::Create(), |
| .gpio_collector = EmptyGpioCollector::Create(), |
| }; |
| std::shared_ptr<EntityConfig> entity_config_shared = |
| EmptyEntityConfigImpl::Create(); |
| if (GetTlbmcConfig().fru_collector_module().enabled()) { |
| absl::Time fru_scan_start_time = absl::Now(); |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Scan-FRUs-Begin", |
| fru_scan_start_time); |
| |
| ECCLESIA_ASSIGN_OR_RETURN( |
| std::unique_ptr<FruCollector> fru_collector, |
| options.collector_factory->CreateFruCollector(fru_collector_options)); |
| |
| const RawFruTable fru_table = fru_collector->GetCopyOfCurrentScannedFrus(); |
| absl::Time fru_scan_end_time = absl::Now(); |
| metrics_at_bootup.fru_scan_and_collector_create_duration = |
| fru_scan_end_time - fru_scan_start_time; |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Scan-FRUs-End", |
| fru_scan_end_time); |
| |
| // Load Configs |
| absl::Time load_configs_start_time = absl::Now(); |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Load-Configs-Begin", |
| load_configs_start_time); |
| |
| absl::StatusOr<std::unique_ptr<EntityConfig>> entity_config = |
| options.entity_config_reader->CreateEntityConfig(fru_table, |
| ad_hoc_fru_count); |
| if (!entity_config.ok()) { |
| LOG(ERROR) << "Failed to create entity config: " |
| << entity_config.status(); |
| LOG(ERROR) << "The cached FRU table will be deleted."; |
| // If Store is bad, then cache may be bad too. Let cache reload on next |
| // boot. |
| std::filesystem::remove(fru_collector_options.cached_fru_table_path); |
| return entity_config.status(); |
| } |
| |
| absl::Time load_configs_end_time = absl::Now(); |
| metrics_at_bootup.topology_config_load_duration = |
| load_configs_end_time - load_configs_start_time; |
| Tracer::GetInstance().AddOneOffDatapoint("Tlbmc-Load-Configs-End", |
| load_configs_end_time); |
| |
| // EntityConfig must be shared as it is used by the FruCollector. |
| entity_config_shared = absl::ShareUniquePtr(std::move(*entity_config)); |
| |
| fru_collector->SetEntityConfig(entity_config_shared); |
| all_collectors.fru_collector = std::move(fru_collector); |
| |
| if (GetTlbmcConfig().sensor_collector_module().enabled()) { |
| // Now create all the collectors one by one. |
| absl::Time sensor_collector_create_start_time = absl::Now(); |
| Tracer::GetInstance().AddOneOffDatapoint( |
| "Tlbmc-Create-SensorCollector-Begin", |
| sensor_collector_create_start_time); |
| |
| SensorCollector::SensorConfigs sensor_configs; |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.hwmon_temp_sensor_configs, |
| entity_config_shared->GetAllHwmonTempSensorConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.psu_sensor_configs, |
| entity_config_shared->GetAllPsuSensorConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.fan_controller_configs, |
| entity_config_shared->GetAllFanControllerConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.fan_pwm_configs, |
| entity_config_shared->GetAllFanPwmConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN(sensor_configs.fan_tach_configs, |
| entity_config_shared->GetAllFanTachConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.shared_mem_sensor_configs, |
| entity_config_shared->GetAllSharedMemSensorConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.virtual_sensor_configs, |
| entity_config_shared->GetAllVirtualSensorConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.intel_cpu_sensor_configs, |
| entity_config_shared->GetAllIntelCpuSensorConfigs()); |
| ECCLESIA_ASSIGN_OR_RETURN( |
| sensor_configs.nic_telemetry_configs, |
| entity_config_shared->GetAllNicTelemetryConfigs()); |
| |
| std::unique_ptr<PeciAccessInterface> peci_access = |
| std::make_unique<PeciAccessImpl>(); |
| |
| absl::StatusOr<std::unique_ptr<SensorCollector>> sensor_collector = |
| options.collector_factory->CreateSensorCollector( |
| {.sensor_configs = std::move(sensor_configs), |
| .i2c_sysfs = *options.i2c_sysfs, |
| .i3c_sysfs = *options.i3c_sysfs, |
| .peci_sysfs = *options.peci_sysfs, |
| .peci_access = *options.peci_access, |
| .override_sensor_sampling_interval_ms = |
| options.override_sensor_sampling_interval_ms}); |
| if (!sensor_collector.ok()) { |
| LOG(ERROR) << "Failed to create sensor collector: " |
| << sensor_collector.status(); |
| LOG(ERROR) << "The cached FRU table will be deleted."; |
| // If Store is bad, then cache may be bad too. Let cache reload on next |
| // boot. |
| std::filesystem::remove(fru_collector_options.cached_fru_table_path); |
| return sensor_collector.status(); |
| } |
| |
| absl::Time sensor_collector_create_end_time = absl::Now(); |
| metrics_at_bootup.sensor_collector_create_duration = |
| sensor_collector_create_end_time - sensor_collector_create_start_time; |
| Tracer::GetInstance().AddOneOffDatapoint( |
| "Tlbmc-Create-SensorCollector-End", sensor_collector_create_end_time); |
| all_collectors.sensor_collector = std::move(*sensor_collector); |
| entity_config_shared->SetSensorCollector( |
| all_collectors.sensor_collector.get()); |
| } |
| } |
| // Add support for other collectors here |
| |
| if (GetTlbmcConfig().metric_collector_module().enabled()) { |
| absl::StatusOr<std::unique_ptr<MetricCollector>> metric_collector; |
| MetricCollector::Params params = { |
| .metric_configs = options.proto_parser->GetSoftwareMetricsConfig()}; |
| |
| if (options.executor_command_map.empty()) { |
| metric_collector = |
| options.collector_factory->CreateMetricCollector(params); |
| } else { |
| params.executor_command_map = options.executor_command_map; |
| metric_collector = MetricCollector::CreateForUnitTest(params); |
| } |
| if (!metric_collector.ok()) { |
| LOG(ERROR) << "Failed to create metric collector: " |
| << metric_collector.status(); |
| return metric_collector.status(); |
| } |
| all_collectors.metric_collector = std::move(*metric_collector); |
| } |
| |
| if (GetTlbmcConfig().gpio_collector_module().enabled()) { |
| absl::StatusOr<std::unique_ptr<GpioCollector>> gpio_collector = |
| options.collector_factory->CreateGpioCollector( |
| {.gpio_configs = options.proto_parser->GetGpioConfigs()}); |
| if (!gpio_collector.ok()) { |
| LOG(ERROR) << "Failed to create GPIO collector: " |
| << gpio_collector.status(); |
| return gpio_collector.status(); |
| } |
| all_collectors.gpio_collector = std::move(*gpio_collector); |
| } |
| |
| // Create the store |
| int periodic_dump_interval_ms = |
| options.override_store_snapshot_interval_ms.value_or( |
| kDefaultPeriodicDumpIntervalMs); |
| metrics_at_bootup.time_to_ready = absl::Now() - create_start_time; |
| |
| // Trigger AdHocFruScanning after every collector has been initialized to |
| // prevent race conditions |
| all_collectors.fru_collector->SetUpAdHocFruScanning(); |
| |
| return absl::WrapUnique(new StoreImpl( |
| std::move(options), std::move(all_collectors), |
| std::move(entity_config_shared), |
| absl::Milliseconds(periodic_dump_interval_ms), metrics_at_bootup)); |
| } |
| |
| void StoreImpl::SetSmartRouter(RouterInterface* smart_router) { |
| entity_config_->SetSmartRouter(smart_router); |
| } |
| |
| StoreImpl::StoreImpl(Options&& options, AllCollectors all_collectors, |
| std::shared_ptr<EntityConfig> entity_config, |
| absl::Duration store_snapshot_interval, |
| Metrics metrics_at_bootup) |
| : options_(std::move(options)), |
| all_collectors_(std::move(all_collectors)), |
| entity_config_(std::move(entity_config)), |
| task_scheduler_(std::make_unique<TaskScheduler>()), |
| metrics_(metrics_at_bootup) { |
| task_scheduler_->RunAndScheduleAsync( |
| [this](absl::AnyInvocable<void()> done) { |
| // Dump the store snapshot. |
| LOG(WARNING) << "=== Store snapshot ==="; |
| LOG(WARNING) << ToJson().dump(); |
| LOG(WARNING) << "=== END ==="; |
| |
| // Dump the collector scheduler stats. |
| LOG(WARNING) << "=== Collector Scheduler stats ==="; |
| LOG(WARNING) << GetSchedulerStats().dump(); |
| LOG(WARNING) << "=== END ==="; |
| |
| // Dump the central config. |
| LOG(WARNING) << "=== Central Config ==="; |
| LOG(WARNING) << GetTlbmcConfig(); |
| LOG(WARNING) << "=== END ==="; |
| done(); |
| }, |
| store_snapshot_interval); |
| } |
| |
| // Since the task scheduler is a member variable, we need to stop it before |
| // destructing since the store is running an async task that is accessing the |
| // task scheduler. |
| StoreImpl::~StoreImpl() { task_scheduler_->Stop(); } |
| |
| void StoreImpl::StartSensorCollection() { |
| all_collectors_.sensor_collector->StartCollection(); |
| } |
| |
| } // namespace milotic_tlbmc |