Add APIs for ForceKill and ResetErrorCounters in Stellaris CPER plugin
PiperOrigin-RevId: 740962150
Change-Id: I8f6d642be1ac7bb226ad84362544997a7811d510
diff --git a/cper/plugin.cc b/cper/plugin.cc
index c73d02f..9b14545 100644
--- a/cper/plugin.cc
+++ b/cper/plugin.cc
@@ -14,6 +14,9 @@
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
#include "redfish_query_engine/http/client.h"
#include "redfish_query_engine/http/codes.h"
#include "nlohmann/json.hpp"
@@ -27,6 +30,7 @@
#include "sse_plugin/event.h"
#include "events.pb.h"
#include "sse_plugin/events_manager.h"
+#include "utils/std_thread.h"
#include "voyager/priority_queue.hpp"
namespace milotic {
@@ -35,6 +39,9 @@
constexpr int kVlogVerbosity = 1;
} // namespace
+constexpr absl::string_view kResetErrorCounters =
+ "/google/cper/reset_error_counters";
+constexpr absl::string_view kPostForceKillPath = "/google/cper/forcekill";
constexpr absl::string_view kGetMachineHealthPath =
"/google/v1/Chassis/stellaris/Oem/Google/Health";
constexpr absl::string_view kMachineHealthResourceName = "ots-cn-health-status";
@@ -63,6 +70,8 @@
{RedfishPlugin::RequestVerb::kPut, put_processed_events_path_},
{RedfishPlugin::RequestVerb::kSubscribe, subscribe_events_path_},
{RedfishPlugin::RequestVerb::kGet, kGetMachineHealthPath},
+ {RedfishPlugin::RequestVerb::kPost, kPostForceKillPath},
+ {RedfishPlugin::RequestVerb::kPost, kResetErrorCounters},
};
return proxy_->GetRequestAction(request_verb, request, paths_to_handle);
}
@@ -93,12 +102,86 @@
/*field_values=*/{});
}
+ if (verb == RedfishPlugin::RequestVerb::kPost &&
+ request_path == kPostForceKillPath) {
+ return milotic::CaptureResponseCode(
+ HandlePostForceKill(std::move(http_request)),
+ &CperMetrics::Get().cper_response_code, request_path,
+ /*field_values=*/{});
+ }
+
+ if (verb == RedfishPlugin::RequestVerb::kPost &&
+ request_path == kResetErrorCounters) {
+ return milotic::CaptureResponseCode(
+ HandleResetErrorCounters(std::move(http_request)),
+ &CperMetrics::Get().cper_response_code, request_path,
+ /*field_values=*/{});
+ }
+
return ProxyResponse(
HTTP_CODE_BAD_REQUEST,
absl::StrCat("Unable to handle request on uri: ", http_request->uri),
{{"Content-Type", "text/plain"}});
}
+static void KillThreadRun() {
+ LOG(INFO) << "Kill vBMC started";
+ absl::SleepFor(absl::Seconds(5));
+ // go/totw/20 - Use quick_exit.
+ LOG(INFO) << "Killing vBMC";
+ quick_exit(0);
+}
+
+absl::StatusOr<milotic::ProxyResponse> CperEventsPlugin::HandlePostForceKill(
+ std::unique_ptr<milotic::ProxyRequest> request) {
+ milotic::ProxyResponse response(HTTP_CODE_REQUEST_OK,
+ {{"Content-Type", "text/plain"}});
+ absl::MutexLock lock(&kill_mutex_);
+ if (kill_thread_ == nullptr) {
+ kill_thread_ =
+ std::make_unique<StdThread>("KillThread", std::move(KillThreadRun));
+ kill_thread_->Start();
+ }
+ return response;
+}
+
+absl::StatusOr<milotic::ProxyResponse>
+CperEventsPlugin::HandleResetErrorCounters(
+ std::unique_ptr<milotic::ProxyRequest> request) {
+ milotic::ProxyResponse response(HTTP_CODE_REQUEST_OK,
+ {{"Content-Type", "text/plain"}});
+ absl::MutexLock lock(&reset_error_counters_mutex_);
+ if ((!events_manager_.IsServing() && !events_manager_.IsStopping())) {
+ response.code = HTTP_CODE_SERVICE_UNAV;
+ response.body = absl::StrCat("vBMC is Initializing, current state: ",
+ events_manager_.ExplainPluginState());
+ return response;
+ }
+ if (!storage_manager_->SendHeartbeat().ok()) {
+ response.code = HTTP_CODE_SERVICE_UNAV;
+ response.body = "vBMC is not connected to the collector";
+ return response;
+ }
+
+ // Archive events in the storage manager first and then reset.
+ LOG(INFO) << "Archiving processed events";
+ absl::Status status = storage_manager_->ArchiveProcessedEvents();
+ if (!status.ok()) {
+ response.code = HTTP_CODE_SERVICE_UNAV;
+ response.body =
+ absl::StrCat("Failed to archive processed events: ", status.message());
+ return response;
+ }
+ LOG(INFO) << "Resetting storage manager";
+ storage_manager_->Reset();
+
+ LOG(INFO) << "Resetting events manager";
+ events_manager_.Reset();
+
+ response.body = "Reset completed";
+ return response;
+}
+
absl::StatusOr<ProxyResponse> CperEventsPlugin::HandleGetHealth(
std::unique_ptr<ProxyRequest> request) {
ProxyResponse response(HTTP_CODE_BAD_REQUEST,
diff --git a/cper/plugin.h b/cper/plugin.h
index 13ea597..db18ddc 100644
--- a/cper/plugin.h
+++ b/cper/plugin.h
@@ -8,6 +8,7 @@
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
+#include "absl/synchronization/mutex.h"
#include "cper/manager.h"
#include "proxy.h"
#include "proxy_config.pb.h"
@@ -15,6 +16,7 @@
#include "request_response.h"
#include "sse_plugin/collector_storage_manager.h"
#include "sse_plugin/storage_manager.h"
+#include "utils/thread.h"
#include "voyager/priority_queue.hpp"
namespace milotic {
@@ -61,12 +63,19 @@
std::unique_ptr<ProxyRequest> request);
absl::StatusOr<ProxyResponse> HandleFetchHealthRollup(
std::unique_ptr<ProxyRequest> request);
+ absl::StatusOr<ProxyResponse> HandlePostForceKill(
+ std::unique_ptr<ProxyRequest> request);
+ absl::StatusOr<ProxyResponse> HandleResetErrorCounters(
+ std::unique_ptr<ProxyRequest> request);
voyager::TelemetryPriorityQueue events_queue_;
milotic::EventStorageManager* storage_manager_;
CperEventsManager events_manager_;
bool init_events_manager_;
milotic::Proxy* proxy_ = nullptr;
+ absl::Mutex kill_mutex_;
+ std::unique_ptr<MiloticThread> kill_thread_;
+ absl::Mutex reset_error_counters_mutex_;
std::string put_processed_events_path_;
std::string subscribe_events_path_;
std::string collector_request_id_filter_;