blob: 6f77a5bebb1f7accc2f4859c630d88e54f767490 [file] [log] [blame]
#ifndef THIRD_PARTY_MILOTIC_INTERNAL_CC_PROXY_CPER_MANAGER_H_
#define THIRD_PARTY_MILOTIC_INTERNAL_CC_PROXY_CPER_MANAGER_H_
#include <fcntl.h>
#include <stdbool.h>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_map.h"
#include "absl/log/check.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/synchronization/notification.h"
#include "absl/time/time.h"
#include "nlohmann/json_fwd.hpp"
#include "proxy.h"
#include "proxy_config.pb.h"
#include "sse_plugin/event.h"
#include "sse_plugin/events_manager.h"
#include "sse_plugin/storage_manager.h"
#include "vendor_events.pb.h"
#include "utils/thread.h"
#include "voyager/priority_queue.hpp"
#include "utils/clock.h"
namespace milotic {
class CperMachineState : public BaseMachineState {
public:
explicit CperMachineState(
const milotic_grpc_proxy::Plugin::CperEvents::CounterFile& config);
bool UpdateStatus(Event& event, bool is_replay) override;
// Used to update the health and status of the machine for internal events.
void UpdateInternalEvent(absl::string_view event_message,
EventSeverity severity) override;
~CperMachineState() override;
int GetMemoryErrorCount(absl::string_view origin) const
ABSL_LOCKS_EXCLUDED(mutex_);
int GetCpuErrorCount(absl::string_view origin) const
ABSL_LOCKS_EXCLUDED(mutex_);
private:
bool InterruptibleSleep(absl::Duration duration);
absl::Status UpdateCounters(const Event::CperEventData& cper_event_data)
ABSL_LOCKS_EXCLUDED(mutex_);
absl::StatusOr<nlohmann::json> GetCountersFileContents()
ABSL_LOCKS_EXCLUDED(mutex_);
absl::Status SaveToCountersFile(const nlohmann::json& contents);
void SaveCountersToFileThread();
std::string counter_file_path_;
std::string counter_file_temp_path_;
std::string memory_error_field_name_;
std::string cpu_error_field_name_;
absl::Duration save_retry_interval_ = absl::Seconds(1);
mutable absl::Mutex mutex_;
enum class CounterState : std::uint8_t {
kPending = 0,
kSaved = 1,
kShutdown = 2,
} counter_state_ ABSL_GUARDED_BY(mutex_) = CounterState::kPending;
absl::flat_hash_map<std::string, int> memory_error_counters_
ABSL_GUARDED_BY(mutex_);
absl::flat_hash_map<std::string, int> cpu_error_counters_
ABSL_GUARDED_BY(mutex_);
std::unique_ptr<MiloticThread> save_counters_thread_;
};
// Handles events in Eiger machines.
// MainThreadRun has to be called in a new thread.
// Requires a StorageManager for event persistence and TelemetryPriorityQueue
// for processing events from RMC.
// NOTE: config values for `health_rollup_fields` and `redfish_logs_fields`
// should be valid json pointer strings, a malformed json pointer will cause a
// crash on startup.
class CperEventsManager : public BaseEventsManager {
public:
CperEventsManager(const milotic_grpc_proxy::Plugin::CperEvents& config,
std::unique_ptr<EventStorageManager> storage_manager,
voyager::TelemetryPriorityQueue* queue,
util::Clock* clock = util::Clock::RealClock())
: BaseEventsManager(
config.events_manager(), std::move(storage_manager), queue,
std::make_unique<CperMachineState>(config.counter_file()), clock) {
sse_accept_header_ = "text/event-stream";
if (config.remapping_ready_timeout_sec() > 0) {
remapping_ready_timeout_ =
absl::Seconds(config.remapping_ready_timeout_sec());
}
if (config.remapping_retry_delay_ms() > 0) {
remapping_retry_delay_ =
absl::Milliseconds(config.remapping_retry_delay_ms());
}
}
~CperEventsManager() override;
void SetProxy(Proxy* proxy) override;
private:
absl::Status ProcessInitExpiry(voyager::Job& job) override;
absl::Status ProcessEvent(voyager::Job& job) override;
absl::StatusOr<std::vector<Event>> ParseSSEvent(
absl::string_view sse_json, absl::string_view sse_id) override;
void PullEventsFromRMC() override;
void RemapMemoryOrigin(Event::CperEventData& cper_event_data) const;
void GetSystemsForRemapping(absl::Duration delay = absl::ZeroDuration());
absl::Status InitRemapping(const nlohmann::json& systems);
void GetRemappingForMemory(nlohmann::json memory);
absl::Duration remapping_ready_timeout_ = absl::Seconds(10);
absl::Duration remapping_retry_delay_ = absl::Seconds(1);
absl::Notification shutdown_;
absl::Notification remapping_ready_;
absl::flat_hash_map<std::string, std::string> memory_origin_remapping_;
};
} // namespace milotic
#endif // THIRD_PARTY_MILOTIC_INTERNAL_CC_PROXY_CPER_MANAGER_H_