Add Power Fault Log Collector Module and Configs

This CL adds the PowerFaultLogCollectorModule to the tlbmc central config, including:

* New proto messages for the module, config, and log entries.
* Updated proto_config_parser to load the power fault log config.
* Added new files to meson.build and copy.bara.sky.
* Added the module (disabled by default) to tlbmc_config_bundle.textproto and platform JSON configs for tests.

#tlbmc_power_fault_log

PiperOrigin-RevId: 847532034
Change-Id: I4351449b11c3589bf9600ff8d9e633a2a0716ced
diff --git a/tlbmc/central_config.proto b/tlbmc/central_config.proto
index d49d2f1..1cd805d 100644
--- a/tlbmc/central_config.proto
+++ b/tlbmc/central_config.proto
@@ -122,6 +122,11 @@
   PowerControlSubmodule power_control_sub_module = 2;
 }
 
+// Controls the Power Fault Log collector module.
+message PowerFaultLogCollectorModule {
+  bool enabled = 1 [default = false];
+}
+
 // A proto message to hold all configurations of the modules in tlbmc.
 message TlbmcConfig {
   string platform_name = 8;
@@ -133,6 +138,7 @@
   RedfishRateLimiterModule redfish_rate_limiter_module = 5;
   TrustBundleInstallModule trust_bundle_install_module = 6;
   GpioCollectorModule gpio_collector_module = 7;
+  PowerFaultLogCollectorModule power_fault_log_collector_module = 9;
 }
 
 message TlbmcConfigBundle {
diff --git a/tlbmc/collector/power_fault_log_collector.cc b/tlbmc/collector/power_fault_log_collector.cc
new file mode 100644
index 0000000..056470a
--- /dev/null
+++ b/tlbmc/collector/power_fault_log_collector.cc
@@ -0,0 +1,249 @@
+#include "tlbmc/collector/power_fault_log_collector.h"
+
+#include <algorithm>
+#include <filesystem>  // NOLINT
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <system_error>  // NOLINT
+#include <utility>
+#include <vector>
+
+#include "absl/functional/any_invocable.h"
+#include "absl/log/log.h"
+#include "absl/memory/memory.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
+#include "nlohmann/json.hpp"
+#include "power_fault_log_config.pb.h"
+#include "power_fault_log_entry.pb.h"
+#include "tlbmc/scheduler/scheduler.h"
+#include "google/protobuf/json/json.h"
+
+namespace milotic_tlbmc {
+
+namespace fs = std::filesystem;
+absl::StatusOr<std::unique_ptr<PowerFaultLogCollector>>
+PowerFaultLogCollector::Create(const Params& params) {
+  auto thread_manager =
+      std::make_unique<PowerFaultLogThreadManager>(params.clock);
+  if (params.config.enable_detection()) {
+    // TODO(b/464412408): This feature will be implemented in a follow up CL.
+    LOG(INFO) << "Power fault detection object creation is enabled.";
+  } else {
+    LOG(INFO) << "Power fault detection object creation is disabled.";
+  }
+  return absl::WrapUnique(
+      new PowerFaultLogCollector(params.config, std::move(thread_manager)));
+}
+
+absl::Status PowerFaultLogCollector::StartCollection() {
+  // check if the path exists and is a directory
+  fs::path log_path(config_.power_fault_log_path());
+  std::error_code ec;
+  if (!fs::exists(log_path, ec)) {
+    if (ec) {
+      return absl::InternalError(absl::StrCat(
+          "Failed to check if log path exists: ", log_path.string()));
+    }
+    return absl::NotFoundError(
+        absl::StrCat("Log path does not exist: ", log_path.string()));
+  }
+  if (!fs::is_directory(log_path, ec)) {
+    if (ec) {
+      return absl::InternalError(absl::StrCat(
+          "Failed to check if log path is a directory: ", log_path.string()));
+    }
+    return absl::FailedPreconditionError(
+        absl::StrCat("Log path is not a directory: ", log_path.string()));
+  }
+  thread_manager_->task_ids.push_back(
+      thread_manager_->task_scheduler->RunAndScheduleAsync(
+          [this](absl::AnyInvocable<void()> on_done) {
+            DoCollection(std::move(on_done));
+          },
+          absl::Seconds(config_.collection_interval_seconds())));
+  return absl::OkStatus();
+}
+
+PowerFaultLogCollector::PowerFaultLogCollector(
+    const PowerFaultLogConfig& config,
+    std::unique_ptr<PowerFaultLogThreadManager> thread_manager)
+    : config_(config), thread_manager_(std::move(thread_manager)) {}
+
+PowerFaultLogCollector::~PowerFaultLogCollector() {
+  if (thread_manager_) {
+    thread_manager_->task_scheduler->Stop();
+  }
+}
+
+void PowerFaultLogCollector::DoCollection(absl::AnyInvocable<void()> on_done) {
+  fs::path log_path(config_.power_fault_log_path());
+  std::error_code ec;
+  for (const auto& entry : fs::directory_iterator(log_path, ec)) {
+    std::error_code entry_ec;
+    if (!entry.is_directory(entry_ec)) {
+      if (entry_ec) {
+        LOG_EVERY_N_SEC(ERROR, absl::ToInt64Seconds(absl::Minutes(5)))
+            << "Error checking if entry " << entry.path()
+            << " is a directory: " << entry_ec.message();
+      }
+      // Not a directory, skip.
+      continue;
+    }
+    // New directory found. Check if it's already processed.
+    {
+      absl::MutexLock lock(mutex_);
+      if (processed_dirs_.contains(entry.path().filename().string())) {
+        continue;  // Already processed and has entries, skip re-adding.
+      }
+    }
+    ProcessNewDirectory(entry.path());
+  }
+  if (ec) {  // Check for error after loop
+    LOG_EVERY_N_SEC(ERROR, absl::ToInt64Seconds(absl::Minutes(5)))
+        << "Error finishing directory iteration " << log_path << ": "
+        << ec.message();
+  }
+
+  if (on_done) {
+    on_done();
+  }
+}
+
+void PowerFaultLogCollector::ProcessNewDirectory(const fs::path& dir_path) {
+  std::string folder_name = dir_path.filename().string();
+  LOG(INFO) << "Processing new directory: " << folder_name;
+  if (folder_name.ends_with(".tmp")) {
+    LOG(INFO) << "Skipping current directory: " << folder_name;
+    return;
+  }
+  std::vector<std::string> file_names;
+  std::error_code ec;
+  for (const auto& entry : fs::directory_iterator(dir_path, ec)) {
+    std::error_code entry_ec;
+    if (!entry.is_regular_file(entry_ec)) {
+      if (entry_ec) {
+        LOG(ERROR) << "Error checking if entry " << entry.path()
+                   << " is a regular file: " << entry_ec.message();
+      }
+      // Not a regular file, skip.
+      continue;
+    }
+    file_names.push_back(entry.path().filename().string());
+  }
+  if (ec) {  // Check for error after loop
+    LOG(ERROR) << "Error finishing directory iteration " << dir_path << ": "
+               << ec.message();
+    // If there is an error when iterating the directory, we should not add
+    // the directory to processed_dirs_ to avoid re-scanning the directory.
+    absl::MutexLock lock(mutex_);
+    processed_dirs_.insert(folder_name);
+    return;
+  }
+  if (file_names.empty()) {
+    LOG(INFO) << "No files found in directory: " << folder_name;
+    return;
+  }
+  // Now we know the directory has files. Only add to collected_entries_ and
+  // processed_dirs_ if it's genuinely new.
+  PowerFaultLogEntry new_entry;
+  new_entry.set_folder_name(folder_name);
+  for (const auto& file_name : file_names) {
+    new_entry.add_file_names(file_name);
+  }
+  absl::MutexLock lock(mutex_);
+  processed_dirs_.insert(folder_name);
+  *collected_entries_.add_entries() = new_entry;
+  LOG(INFO) << "Added entry for folder: " << folder_name;
+}
+
+PowerFaultLogEntries PowerFaultLogCollector::GetCollectedEntries() const {
+  absl::MutexLock lock(mutex_);
+  return collected_entries_;
+}
+
+absl::StatusOr<std::string> PowerFaultLogCollector::GetLogFileContent(
+    absl::string_view folder_name, absl::string_view file_name) const {
+  // Check if the file is in the collected entries to avoid unauthorized access.
+  {
+    absl::MutexLock lock(mutex_);
+    const auto it = std::find_if(
+        collected_entries_.entries().begin(),
+        collected_entries_.entries().end(), [&](const auto& entry) {
+          if (entry.folder_name() != folder_name) {
+            return false;
+          }
+          return std::any_of(entry.file_names().begin(),
+                             entry.file_names().end(),
+                             [&](const auto& f) { return f == file_name; });
+        });
+    if (it == collected_entries_.entries().end()) {
+      return absl::NotFoundError(
+          absl::StrCat("File not found in collected entries: ", folder_name,
+                       "/", file_name));
+    }
+  }
+  fs::path file_path =
+      fs::path(config_.power_fault_log_path()) / folder_name / file_name;
+  std::error_code ec;
+  if (!fs::exists(file_path, ec) || !fs::is_regular_file(file_path, ec)) {
+    return absl::NotFoundError(
+        absl::StrCat("File not found: ", file_path.string()));
+  }
+  std::ifstream file(file_path);
+  if (!file.is_open()) {
+    return absl::InternalError(
+        absl::StrCat("Failed to open file: ", file_path.string()));
+  }
+  std::stringstream buffer;
+  buffer << file.rdbuf();
+  return buffer.str();
+}
+
+nlohmann::json PowerFaultLogCollector::ToJson() const {
+  absl::MutexLock lock(mutex_);
+  std::string json_string;
+  if (!::google::protobuf::json::MessageToJsonString(collected_entries_, &json_string)
+           .ok()) {
+    LOG(ERROR) << "Failed to convert PowerFaultLogEntries to JSON string.";
+    return nlohmann::json();
+  }
+  return nlohmann::json::parse(json_string, nullptr, false);
+}
+
+nlohmann::json PowerFaultLogCollector::GetSchedulerStats() const {
+  return thread_manager_->task_scheduler->ToJson();
+}
+
+std::unique_ptr<EmptyPowerFaultLogCollector>
+EmptyPowerFaultLogCollector::Create() {
+  return std::make_unique<EmptyPowerFaultLogCollector>();
+}
+
+absl::Status EmptyPowerFaultLogCollector::StartCollection() {
+  return absl::OkStatus();
+}
+
+absl::StatusOr<std::string> EmptyPowerFaultLogCollector::GetLogFileContent(
+    absl::string_view folder_name, absl::string_view file_name) const {
+  return absl::UnimplementedError(
+      "GetLogFileContent is not implemented for EmptyPowerFaultLogCollector.");
+}
+
+nlohmann::json EmptyPowerFaultLogCollector::ToJson() const {
+  return nlohmann::json::parse(
+      "{\"Warning\": \"EmptyPowerFaultLogCollector used.\"}");
+}
+
+nlohmann::json EmptyPowerFaultLogCollector::GetSchedulerStats() const {
+  return nlohmann::json::parse(
+      "{\"Warning\": \"EmptyPowerFaultLogCollector used.\"}");
+}
+
+}  // namespace milotic_tlbmc
diff --git a/tlbmc/collector/power_fault_log_collector.h b/tlbmc/collector/power_fault_log_collector.h
new file mode 100644
index 0000000..d9f64df
--- /dev/null
+++ b/tlbmc/collector/power_fault_log_collector.h
@@ -0,0 +1,89 @@
+#ifndef THIRD_PARTY_MILOTIC_EXTERNAL_CC_TLBMC_COLLECTOR_POWER_FAULT_LOG_COLLECTOR_H_
+#define THIRD_PARTY_MILOTIC_EXTERNAL_CC_TLBMC_COLLECTOR_POWER_FAULT_LOG_COLLECTOR_H_
+#include <filesystem>  // NOLINT
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "absl/base/thread_annotations.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/functional/any_invocable.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "time/clock.h"
+#include "nlohmann/json.hpp"
+#include "tlbmc/collector/collector.h"
+#include "power_fault_log_config.pb.h"
+#include "power_fault_log_entry.pb.h"
+#include "tlbmc/scheduler/scheduler.h"
+
+namespace milotic_tlbmc {
+
+struct PowerFaultLogThreadManager {
+  explicit PowerFaultLogThreadManager(ecclesia::Clock* clock)
+      : task_scheduler(std::make_unique<TaskScheduler>(clock)) {}
+  std::unique_ptr<TaskScheduler> task_scheduler;
+  std::vector<int> task_ids;
+};
+
+// This class is used to collect power fault logs from a directory. It is
+// responsible for monitoring the directory for new logs, parsing them, and
+// storing them in a PowerFaultLogEntries proto. It expects that a folder
+// represents a single fault event and the folder contains multiple log files.
+class PowerFaultLogCollector : public Collector {
+ public:
+  struct Params {
+    ecclesia::Clock* clock = ecclesia::Clock::RealClock();
+    PowerFaultLogConfig config;
+  };
+  static absl::StatusOr<std::unique_ptr<PowerFaultLogCollector>> Create(
+      const Params& params);
+  ~PowerFaultLogCollector() override;
+  // Starts monitoring the log path.
+  virtual absl::Status StartCollection();
+  // Performs a single collection of the log path.
+  virtual void DoCollection(absl::AnyInvocable<void()> on_done);
+  // Returns a copy of the collected entries.
+  virtual PowerFaultLogEntries GetCollectedEntries() const;
+  // Returns the content of the log file.
+  virtual absl::StatusOr<std::string> GetLogFileContent(
+      absl::string_view folder_name, absl::string_view file_name) const;
+  nlohmann::json ToJson() const override;
+  nlohmann::json GetSchedulerStats() const override;
+
+ protected:
+  PowerFaultLogCollector() = default;
+
+ private:
+  PowerFaultLogCollector(
+      const PowerFaultLogConfig& config,
+      std::unique_ptr<PowerFaultLogThreadManager> thread_manager);
+  // Processes a new directory found in the log path.
+  void ProcessNewDirectory(const std::filesystem::path& dir_path);
+  PowerFaultLogConfig config_;
+  std::unique_ptr<PowerFaultLogThreadManager> thread_manager_;
+  mutable absl::Mutex mutex_;
+  PowerFaultLogEntries collected_entries_ ABSL_GUARDED_BY(mutex_);
+  absl::flat_hash_set<std::string> processed_dirs_ ABSL_GUARDED_BY(mutex_);
+};
+
+class EmptyPowerFaultLogCollector final : public PowerFaultLogCollector {
+ public:
+  static std::unique_ptr<EmptyPowerFaultLogCollector> Create();
+
+  absl::Status StartCollection() override;
+
+  absl::StatusOr<std::string> GetLogFileContent(
+      absl::string_view folder_name,
+      absl::string_view file_name) const override;
+
+  nlohmann::json ToJson() const override;
+
+  nlohmann::json GetSchedulerStats() const override;
+};
+
+}  // namespace milotic_tlbmc
+
+#endif  // THIRD_PARTY_MILOTIC_EXTERNAL_CC_TLBMC_COLLECTOR_POWER_FAULT_LOG_COLLECTOR_H_
diff --git a/tlbmc/configs/proto_config_parser.cc b/tlbmc/configs/proto_config_parser.cc
index 98b283f..d5b44df 100644
--- a/tlbmc/configs/proto_config_parser.cc
+++ b/tlbmc/configs/proto_config_parser.cc
@@ -49,6 +49,8 @@
       absl::StrCat(config_path_, "power_control_configs.textproto"));
   data_store_.thermal_configs = GetConfigFromProto<ThermalConfigs>(
       absl::StrCat(config_path_, "thermal_config.textproto"));
+  data_store_.power_fault_log_config = GetConfigFromProto<PowerFaultLogConfig>(
+      absl::StrCat(config_path_, "power_fault_log_config.textproto"));
 }
 
 SoftwareMetricsConfig ProtoConfigParser::GetSoftwareMetricsConfig() const {
@@ -67,6 +69,10 @@
   return data_store_.thermal_configs;
 }
 
+PowerFaultLogConfig ProtoConfigParser::GetPowerFaultLogConfig() const {
+  return data_store_.power_fault_log_config;
+}
+
 std::unique_ptr<ProtoConfigParser> ProtoConfigParser::Create(
     absl::string_view config_path) {
   return std::make_unique<ProtoConfigParser>(config_path);
diff --git a/tlbmc/configs/proto_config_parser.h b/tlbmc/configs/proto_config_parser.h
index a0eda6d..6597bf5 100644
--- a/tlbmc/configs/proto_config_parser.h
+++ b/tlbmc/configs/proto_config_parser.h
@@ -9,6 +9,7 @@
 #include "absl/strings/string_view.h"
 #include "gpio_config.pb.h"
 #include "power_control.pb.h"
+#include "power_fault_log_config.pb.h"
 #include "software_metrics_config.pb.h"
 #include "thermal_config.pb.h"
 
@@ -19,6 +20,7 @@
   GpioConfigs gpio_configs;
   PowerControlConfigs power_control_configs;
   ThermalConfigs thermal_configs;
+  PowerFaultLogConfig power_fault_log_config;
 };
 
 class ProtoConfigParser {
@@ -31,6 +33,7 @@
   GpioConfigs GetGpioConfigs() const;
   PowerControlConfigs GetPowerControlConfigs() const;
   ThermalConfigs GetThermalConfigs() const;
+  PowerFaultLogConfig GetPowerFaultLogConfig() const;
 
   explicit ProtoConfigParser(absl::string_view config_path)
       : config_path_(config_path) {}
diff --git a/tlbmc/meson.build b/tlbmc/meson.build
index 830de41..2881549 100644
--- a/tlbmc/meson.build
+++ b/tlbmc/meson.build
@@ -30,6 +30,8 @@
   'nic_telemetry_config.proto',
   'payload.proto',
   'power_control.proto',
+  'power_fault_log_config.proto',
+  'power_fault_log_entry.proto',
   'psu_sensor_config.proto',
   'reading_range_config.proto',
   'reading_transform_config.proto',
@@ -154,6 +156,7 @@
   'collector/gpio_collector.cc',
   'collector/metric_collector.cc',
   'collector/peci_scanner.cc',
+  'collector/power_fault_log_collector.cc',
   'collector/sensor_collector.cc',
   'collector/thermal_collector.cc',
   'configs/blocklist_parser.cc',
diff --git a/tlbmc/power_fault_log_config.proto b/tlbmc/power_fault_log_config.proto
new file mode 100644
index 0000000..a818629
--- /dev/null
+++ b/tlbmc/power_fault_log_config.proto
@@ -0,0 +1,12 @@
+edition = "2023";
+
+package milotic_tlbmc;
+
+message PowerFaultLogConfig {
+  // The interval in seconds between each power fault log collection.
+  int32 collection_interval_seconds = 1;
+  // Root path to the power fault log directory.
+  string power_fault_log_path = 2;
+  // Whether to enable the power fault log detection.
+  bool enable_detection = 3;
+}
diff --git a/tlbmc/power_fault_log_entry.proto b/tlbmc/power_fault_log_entry.proto
new file mode 100644
index 0000000..448aa7b
--- /dev/null
+++ b/tlbmc/power_fault_log_entry.proto
@@ -0,0 +1,12 @@
+edition = "2023";
+
+package milotic_tlbmc;
+
+message PowerFaultLogEntry {
+  string folder_name = 1;
+  repeated string file_names = 2;
+}
+
+message PowerFaultLogEntries {
+  repeated PowerFaultLogEntry entries = 1;
+}