#s4v2 - Add request validation checks to action context

This change runs request validation when starting a new action or resuming from saved state.

In the resume case, it is possible that we cannot determine validity until after the local boot counter becomes available, so loading is deferred in cases where there are transient failures to check the request.

PiperOrigin-RevId: 841973504
Change-Id: Ia48e8f431f915127661d09890626a0f9fe62b540
diff --git a/action_context.cc b/action_context.cc
index 6cdb554..ca50fdf 100644
--- a/action_context.cc
+++ b/action_context.cc
@@ -1,5 +1,6 @@
 #include "action_context.h"
 
+#include <algorithm>
 #include <functional>
 #include <memory>
 #include <optional>
@@ -7,6 +8,7 @@
 #include <utility>
 
 #include "google/protobuf/timestamp.pb.h"
+#include "action_validation.h"
 #include "condition.h"
 #include "convert_proto.h"
 #include "daemon_context.h"
@@ -21,7 +23,6 @@
 #include "absl/log/log.h"
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
-#include "absl/strings/str_format.h"
 #include "absl/strings/string_view.h"
 #include "absl/synchronization/mutex.h"
 #include "absl/time/time.h"
@@ -35,6 +36,8 @@
 using ::safepower_agent_proto::ActionStateChange;
 using ::safepower_agent_proto::ActionStateLog;
 
+static constexpr absl::Duration kMinRetryInterval = absl::Milliseconds(100);
+
 ActionStateLog ActionContext::NewInitialState() {
   ActionStateLog initial_state;
   initial_state.set_epoch_ms(DaemonContext::Get().epoch_ms());
@@ -426,6 +429,70 @@
   return action_context;
 }
 
+absl::Status ActionContextManager::ReloadAndInsertAction(
+    safepower_agent_persistence_proto::SavedActionRecord saved_action_record) {
+  absl::Status validation_result = absl::OkStatus();
+  if (!ActionContext::IsFinalState(
+          saved_action_record.actions().action_state_log().current_state())) {
+    validation_result = ValidateRequest(
+        saved_action_record.actions().original_request(),
+        GetStartTime(saved_action_record.actions().action_state_log()));
+  }
+  if (absl::IsUnavailable(validation_result)) {
+    // In case validation failed because there was insufficient data to make a
+    // decision, retry the validation later.
+    LOG(ERROR) << "Failed to reload action; will retry: " << validation_result;
+    auto& daemon_context = DaemonContext::Get();
+    auto retry_interval =
+        std::max(absl::Milliseconds(daemon_context.config()
+                                        .condition_validation_options()
+                                        .validation_retry_interval_ms()),
+                 kMinRetryInterval);
+    std::string task_name =
+        absl::StrCat("ReloadAndInsertAction", saved_action_record.actions()
+                                                  .original_request()
+                                                  .flight_record()
+                                                  .DebugString());
+    absl::Status delay_status = daemon_context.scheduler().DelayCall(
+        [this, saved_action_record = std::move(saved_action_record)]() mutable {
+          absl::MutexLock lock(actions_mutex_);
+          absl::Status status =
+              ReloadAndInsertAction(std::move(saved_action_record));
+          if (!status.ok()) {
+            LOG(ERROR) << "Failed to reload action: " << status;
+          }
+        },
+        retry_interval, task_name);
+    if (!delay_status.ok()) {
+      LOG(DFATAL) << "Failed to delay reload action: " << delay_status;
+      return delay_status;
+    }
+    // Try the next saved action.
+    return absl::OkStatus();
+  }
+  if (!validation_result.ok()) {
+    LOG(ERROR) << "Request validation failed: " << validation_result;
+    // Try the next saved action.
+    return absl::OkStatus();
+  }
+  const FlightRecordRequest& flight_record =
+      saved_action_record.actions().original_request().flight_record();
+  absl::StatusOr<std::unique_ptr<ActionContext>> action_context_or =
+      ReloadActionContext(saved_action_record.actions());
+  if (!action_context_or.ok()) {
+    LOG(ERROR) << "Failed to reload action: " << action_context_or.status();
+    // Try the next saved action.
+    return absl::OkStatus();
+  }
+  auto [it, inserted] = running_actions_.try_emplace(
+      flight_record, *std::move(action_context_or));
+  if (!inserted) {
+    return absl::AlreadyExistsError(
+        absl::StrCat("Duplicate action ID: ", flight_record));
+  }
+  return absl::OkStatus();
+}
+
 absl::Status ActionContextManager::LoadSavedActions() {
   ASSIGN_OR_RETURN(
       SavedActions saved_actions,
@@ -439,22 +506,7 @@
                   << saved_action_record.DebugString();
       continue;
     }
-    absl::StatusOr<std::unique_ptr<ActionContext>> action_context_or =
-        ReloadActionContext(saved_action_record.actions());
-    if (!action_context_or.ok()) {
-      LOG(ERROR) << "Failed to reload action: " << action_context_or.status();
-      continue;
-    }
-    const FlightRecordRequest& flight_record =
-        saved_action_record.actions().original_request().flight_record();
-    std::unique_ptr<ActionContext> action_context =
-        *std::move(action_context_or);
-    auto [it, inserted] =
-        running_actions_.try_emplace(flight_record, std::move(action_context));
-    if (!inserted) {
-      return absl::AlreadyExistsError(absl::StrCat(
-          "Duplicate action ID: ", action_context->flight_record()));
-    }
+    RETURN_IF_ERROR(ReloadAndInsertAction(saved_action_record));
   }
   return absl::OkStatus();
 }
@@ -489,11 +541,25 @@
   return absl::OkStatus();
 }
 
+absl::Status ActionContextManager::ValidateRequest(
+    const safepower_agent_proto::StartActionRequest& request,
+    absl::Time start_time) {
+  RETURN_IF_ERROR(validateFlightRecord(request.flight_record()));
+  auto& daemon_context = DaemonContext::Get();
+  auto& offline_node_entities = daemon_context.offline_node_entities();
+  if (offline_node_entities.entity_tag().empty()) {
+    return absl::FailedPreconditionError("No offline node entities found");
+  }
+  return ::safepower_agent::ValidateRequest(
+      request, offline_node_entities.entity_tag(), state_updater_->state(),
+      start_time, daemon_context.config().condition_validation_options());
+}
+
 absl::StatusOr<ActionContext* > ActionContextManager::StartAction(
     safepower_agent_proto::StartActionRequest request) {
   // Make sure this is never destroyed with the lock held.
   std::unique_ptr<ActionContext> action_context;
-  RETURN_IF_ERROR(validateFlightRecord(request.flight_record()));
+  RETURN_IF_ERROR(ValidateRequest(request, DaemonContext::Get().now()));
   FlightRecordRequest flight_record = request.flight_record();
   absl::MutexLock lock(actions_mutex_);
   ASSIGN_OR_RETURN(ActionContext::Action action_impl,
diff --git a/action_context.h b/action_context.h
index 41a49af..06fa51a 100644
--- a/action_context.h
+++ b/action_context.h
@@ -22,8 +22,8 @@
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_cat.h"
-#include "absl/strings/string_view.h"
 #include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
 
 namespace safepower_agent {
 class ActionContext;
@@ -69,8 +69,7 @@
 
   ActionContext*  GetActionContext(
       const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest&
-          flight_record)
-      ABSL_LOCKS_EXCLUDED(actions_mutex_);
+          flight_record) ABSL_LOCKS_EXCLUDED(actions_mutex_);
 
   void GetSupportedActions(
       safepower_agent_proto::GetSupportedActionsResponse& response) const
@@ -93,9 +92,14 @@
   const absl::Mutex& actions_mutex() const { return actions_mutex_; }
 
   absl::Status validateFlightRecord(
-    const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest& record);
+      const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest&
+          record);
 
  private:
+  absl::Status ValidateRequest(
+      const safepower_agent_proto::StartActionRequest& request,
+      absl::Time start_time);
+
   absl::StatusOr<Action> ReserveAction(
       const safepower_agent_proto::Action& action)
       ABSL_EXCLUSIVE_LOCKS_REQUIRED(actions_mutex_);
@@ -105,6 +109,10 @@
       safepower_agent_persistence_proto::SavedAction saved_action)
       ABSL_EXCLUSIVE_LOCKS_REQUIRED(actions_mutex_);
 
+  absl::Status ReloadAndInsertAction(
+      safepower_agent_persistence_proto::SavedActionRecord saved_action_record)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(actions_mutex_);
+
   std::shared_ptr<StateUpdater<safepower_agent_proto::SystemState>>
       state_updater_;
   std::shared_ptr<StateUpdater<safepower_agent_proto::SystemState>>
@@ -119,16 +127,16 @@
   struct FlightRecordHash {
     std::size_t operator()(
         const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest&
-          flight_record) const {
-      return absl::HashOf(std::make_pair(flight_record.flight_name(),
-                                         flight_record.step_id()));
+            flight_record) const {
+      return absl::HashOf(
+          std::make_pair(flight_record.flight_name(), flight_record.step_id()));
     }
   };
   struct FlightRecordEq {
     bool operator()(
-      const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest& lhs,
-      const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest& rhs)
-         const {
+        const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest& lhs,
+        const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest& rhs)
+        const {
       return lhs.flight_name() == rhs.flight_name() &&
              lhs.step_id() == rhs.step_id();
     }
@@ -136,8 +144,7 @@
 
   absl::flat_hash_map<
       borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest,
-                      std::unique_ptr<ActionContext>, FlightRecordHash,
-                      FlightRecordEq>
+      std::unique_ptr<ActionContext>, FlightRecordHash, FlightRecordEq>
       running_actions_ ABSL_GUARDED_BY(actions_mutex_);
 };
 
@@ -156,17 +163,17 @@
   using Action = ActionContextManager::Action;
 
   explicit ActionContext(
-    CreationToken token, ActionContextManager& manager,
-    safepower_agent_proto::StartActionRequest request,
-    Action action_impl = {},
-    safepower_agent_proto::ActionStateLog initial_state = NewInitialState());
+      CreationToken token, ActionContextManager& manager,
+      safepower_agent_proto::StartActionRequest request,
+      Action action_impl = {},
+      safepower_agent_proto::ActionStateLog initial_state = NewInitialState());
   ~ActionContext();
 
   ActionContext(const ActionContext& other) = delete;
   ActionContext& operator=(const ActionContext& other) = delete;
 
   const borg_mgmt::node_proxy::safepower::utils::FlightRecordRequest&
-      flight_record() const {
+  flight_record() const {
     return request_.flight_record();
   }
   const safepower_agent_proto::StartActionRequest& request() const {
@@ -217,8 +224,8 @@
     // The execution task name must be unique for each action.
     // As the flight name and step can be any characters, we add a hash to
     // The execution task name to ensure that the name is unique.
-     uint64_t hash = absl::HashOf(std::make_pair(flight_record().flight_name(),
-                                 flight_record().step_id()));
+    uint64_t hash = absl::HashOf(std::make_pair(flight_record().flight_name(),
+                                                flight_record().step_id()));
     return absl::StrCat(flight_record().flight_name(), "-",
                         flight_record().step_id(), "-", hash, ".execution");
   }
diff --git a/action_validation.cc b/action_validation.cc
new file mode 100644
index 0000000..30d99dc
--- /dev/null
+++ b/action_validation.cc
@@ -0,0 +1,206 @@
+#include "action_validation.h"
+
+#include <array>
+#include <cstdint>
+
+#include "condition.h"
+#include "safepower_agent.pb.h"
+#include "safepower_agent_config.pb.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "google/protobuf/repeated_ptr_field.h"
+#include "bmc/status_macros.h"
+
+namespace safepower_agent {
+
+static absl::Status ValidateBootCountCondition(
+    const safepower_agent_proto::Condition& precondition,
+    absl::string_view node_entity_tag,
+    safepower_agent_proto::SystemState system_state, absl::Time start_time,
+    int max_boots) {
+  auto node_state = system_state.mutable_node_state()->find(node_entity_tag);
+  if (node_state == system_state.mutable_node_state()->end()) {
+    return absl::UnavailableError(absl::StrFormat(
+        "Node %s not found in initial system state", node_entity_tag));
+  }
+  safepower_agent_proto::BootState* boot_state =
+      node_state->second.mutable_boot_state();
+  if (!boot_state->has_boot_counter()) {
+    return absl::UnavailableError(absl::StrFormat(
+        "Node %s does not have a boot counter", node_entity_tag));
+  }
+  int current_boot_count = boot_state->boot_counter();
+  for (int boot = 0; boot < max_boots; boot++) {
+    boot_state->set_boot_counter(current_boot_count + boot);
+    auto [status, matches] =
+        Condition::Matches(precondition, system_state, start_time, start_time);
+    if (!matches.empty()) {
+      return absl::OkStatus();
+    }
+    if (!status.ok()) {
+      return status;
+    }
+  }
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "Boot count precondition not met before boot counter = %d",
+      current_boot_count + max_boots));
+}
+
+struct NodeChecksFound {
+  bool target_node = false;
+  bool other_node = false;
+};
+
+static absl::StatusOr<NodeChecksFound> IsAnyNodeChecked(
+    const google::protobuf::RepeatedPtrField<safepower_agent_proto::Condition>& condition,
+    absl::string_view node_entity_tag);
+
+static absl::StatusOr<NodeChecksFound> IsNodeChecked(
+    const safepower_agent_proto::Condition& condition,
+    absl::string_view node_entity_tag) {
+  switch (condition.condition_type_case()) {
+    case safepower_agent_proto::Condition::kStateCondition:
+      if (condition.state_condition().node_entity_tag() == node_entity_tag) {
+        return NodeChecksFound{.target_node = true};
+      }
+      return NodeChecksFound{.other_node = true};
+    case safepower_agent_proto::Condition::kAnyOf:
+      return IsAnyNodeChecked(condition.any_of().conditions(), node_entity_tag);
+    case safepower_agent_proto::Condition::kAllOf:
+      return IsAnyNodeChecked(condition.all_of().conditions(), node_entity_tag);
+    default:
+      return NodeChecksFound{};
+  }
+}
+
+static absl::StatusOr<NodeChecksFound> IsAnyNodeChecked(
+    const google::protobuf::RepeatedPtrField<safepower_agent_proto::Condition>& condition,
+    absl::string_view node_entity_tag) {
+  NodeChecksFound result{};
+  for (const auto& sub_condition : condition) {
+    ASSIGN_OR_RETURN(auto found, IsNodeChecked(sub_condition, node_entity_tag));
+    result.target_node |= found.target_node;
+    result.other_node |= found.other_node;
+    if (result.target_node && result.other_node) {
+      break;
+    }
+  }
+  return result;
+}
+
+enum class TimeoutType : uint8_t {
+  kNone,
+  kMatch,
+  kAbort,
+};
+
+static TimeoutType HasTimeout(
+    const safepower_agent_proto::Condition& condition) {
+  switch (condition.condition_type_case()) {
+    case safepower_agent_proto::Condition::kTimeout:
+      return condition.timeout().abort() ? TimeoutType::kAbort
+                                         : TimeoutType::kMatch;
+    case safepower_agent_proto::Condition::kAnyOf: {
+      TimeoutType timeout_type = TimeoutType::kNone;
+      for (const auto& sub_condition : condition.any_of().conditions()) {
+        TimeoutType sub_type = HasTimeout(sub_condition);
+        if (sub_type == TimeoutType::kAbort) {
+          return TimeoutType::kAbort;
+        }
+        if (sub_type == TimeoutType::kMatch) {
+          timeout_type = TimeoutType::kMatch;
+        }
+      }
+      return timeout_type;
+    }
+    case safepower_agent_proto::Condition::kAllOf: {
+      TimeoutType timeout_type = TimeoutType::kMatch;
+      for (const auto& sub_condition : condition.all_of().conditions()) {
+        TimeoutType sub_type = HasTimeout(sub_condition);
+        if (sub_type == TimeoutType::kAbort) {
+          return TimeoutType::kAbort;
+        }
+        if (sub_type == TimeoutType::kNone) {
+          timeout_type = TimeoutType::kNone;
+        }
+      }
+      return timeout_type;
+    }
+    default:
+      return TimeoutType::kNone;
+  }
+}
+
+static absl::Status ValidateTimeout(
+    const safepower_agent_proto::Condition& condition, absl::Time start_time,
+    absl::Duration max_timeout) {
+  if (HasTimeout(condition) == TimeoutType::kNone) {
+    return absl::InvalidArgumentError("Timeout condition not found");
+  }
+  auto [status, matches] =
+      Condition::Matches(condition, {}, start_time, start_time + max_timeout);
+  if (!matches.empty()) {
+    return absl::OkStatus();
+  }
+  if (!status.ok()) {
+    return status;
+  }
+  return absl::InvalidArgumentError(
+      absl::StrFormat("Timeout precondition not met before timeout = %v",
+                      start_time + max_timeout));
+}
+
+absl::Status ValidateRequest(
+    const safepower_agent_proto::StartActionRequest& request,
+    absl::string_view node_entity_tag,
+    const safepower_agent_proto::SystemState& initial_system_state,
+    absl::Time start_time,
+    const safepower_agent_config::ConditionValidationOptions& options) {
+  LOG(INFO) << "Validating request: " << request.DebugString();
+  // Boot count checks must exist if there is a precondition, and must check the
+  // _actuating_ node. This prevents reboot loops and unintentional reboots
+  // post-action.
+  RETURN_IF_ERROR(ValidateBootCountCondition(
+      request.precondition(), node_entity_tag, initial_system_state, start_time,
+      options.max_boots()));
+  // Timeout checks must exist if there is any precondition.
+  absl::Duration max_timeout = absl::Seconds(options.max_timeout_seconds());
+  if (request.has_precondition()) {
+    RETURN_IF_ERROR(
+        ValidateTimeout(request.precondition(), start_time, max_timeout));
+  }
+  // Timeout checks must exist on validation.
+  if (!request.has_validation()) {
+    return absl::OkStatus();
+  }
+  RETURN_IF_ERROR(
+      ValidateTimeout(request.validation(), start_time, max_timeout));
+  if (request.action().target_component().node_entity_tag().empty()) {
+    LOG(WARNING)
+        << "No target node entity tag; skipping further validation checks.";
+    return absl::OkStatus();
+  }
+  // If there are any validation checks for node state, at least one must check
+  // the node that was actuated. This catches the case where the action and
+  // validation checks are on different nodes.
+  ASSIGN_OR_RETURN(
+      auto found,
+      IsNodeChecked(request.validation(),
+                    request.action().target_component().node_entity_tag()));
+  if (!found.target_node) {
+    if (found.other_node) {
+      return absl::NotFoundError(absl::StrFormat(
+          "Check for node %s not found in validation",
+          request.action().target_component().node_entity_tag()));
+    }
+    LOG(WARNING) << "Validation does not check any node state.";
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace safepower_agent
diff --git a/action_validation.h b/action_validation.h
new file mode 100644
index 0000000..df2e5c5
--- /dev/null
+++ b/action_validation.h
@@ -0,0 +1,21 @@
+#ifndef PRODUCTION_BORG_MGMT_NODE_PROXY_SAFEPOWER_SAFEPOWER_AGENT_ACTION_VALIDATION_H_
+#define PRODUCTION_BORG_MGMT_NODE_PROXY_SAFEPOWER_SAFEPOWER_AGENT_ACTION_VALIDATION_H_
+
+#include "safepower_agent.pb.h"
+#include "safepower_agent_config.pb.h"
+#include "absl/status/status.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+namespace safepower_agent {
+
+absl::Status ValidateRequest(
+    const safepower_agent_proto::StartActionRequest& request,
+    absl::string_view node_entity_tag,
+    const safepower_agent_proto::SystemState& initial_system_state,
+    absl::Time start_time,
+    const safepower_agent_config::ConditionValidationOptions& options =
+        safepower_agent_config::ConditionValidationOptions::default_instance());
+
+}  // namespace safepower_agent
+
+#endif  // PRODUCTION_BORG_MGMT_NODE_PROXY_SAFEPOWER_SAFEPOWER_AGENT_ACTION_VALIDATION_H_
diff --git a/bmc/register_actions_bmc_test.cc b/bmc/register_actions_bmc_test.cc
index 9b98e46..9a403f2 100644
--- a/bmc/register_actions_bmc_test.cc
+++ b/bmc/register_actions_bmc_test.cc
@@ -20,6 +20,7 @@
 #include "state_updater.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
@@ -114,6 +115,34 @@
   return tmpl;
 }
 
+static void SetDefaultOfflineNodeEntities() {
+  auto offline_node_entities = ParseTextProto<
+      production_msv::node_entities_proto::OfflineNodeEntityInformation>(
+      R"pb(
+        entity_tag: "host"
+        resolved_config {
+          machine_name: "test"
+          entities {
+            key: "host"
+            value { tag: "host" hostname: "test" prod_reachable: true }
+          }
+        }
+      )pb");
+  ASSERT_OK(offline_node_entities);
+  DaemonContextBMC::Get().set_offline_node_entities(*offline_node_entities);
+}
+
+static safepower_agent_proto::SystemState InitialSystemState() {
+  auto initial_state = ParseTextProto<safepower_agent_proto::SystemState>(R"pb(
+    node_state {
+      key: "host"
+      value { boot_state { boot_counter: 1 } }
+    }
+  )pb");
+  CHECK_OK(initial_state);
+  return *initial_state;
+}
+
 TEST(RegisterActionsTest, RegisteredDisruptiveActionCausesDisruption) {
   if (isASanEnabled()) {
     LOG(ERROR) << "b/349393089 absl::map insert fails asan" << std::endl;
@@ -125,6 +154,7 @@
   ASSERT_OK_AND_ASSIGN(std::string persistence_dir, CreatePersistenceDir());
   LOG(INFO) << "Created temp dir: " << persistence_dir;
   DaemonContextBMC daemon_context;
+  SetDefaultOfflineNodeEntities();
   ASSERT_OK_AND_ASSIGN(
       auto safepower_agent_config,
       ParseTextProto<safepower_agent_config::SafePowerAgentConfig>(R"pb(
@@ -147,7 +177,7 @@
       persistence_dir);
   daemon_context.set_config(safepower_agent_config);
 
-  safepower_agent_proto::SystemState initialState;
+  safepower_agent_proto::SystemState initialState = InitialSystemState();
   auto reactor =
       std::make_shared<StateUpdater<safepower_agent_proto::SystemState>>(
           std::move(initialState), true);
@@ -176,12 +206,13 @@
   ASSERT_OK_AND_ASSIGN(std::string persistence_dir, CreatePersistenceDir());
   LOG(INFO) << "Created temp dir: " << persistence_dir;
   DaemonContextBMC daemon_context;
+  SetDefaultOfflineNodeEntities();
   safepower_agent_config::SafePowerAgentConfig safepower_agent_config;
   safepower_agent_config.mutable_persistent_storage_config()->set_dir_path(
       persistence_dir);
   daemon_context.set_config(safepower_agent_config);
 
-  safepower_agent_proto::SystemState initialState;
+  safepower_agent_proto::SystemState initialState = InitialSystemState();
   auto reactor =
       std::make_shared<StateUpdater<safepower_agent_proto::SystemState>>(
           std::move(initialState), true);
diff --git a/google3/production/borg_mgmt/node_proxy/safepower/safepower_agent/copy.bara.sky b/google3/production/borg_mgmt/node_proxy/safepower/safepower_agent/copy.bara.sky
index f4da4e0..873ec39 100644
--- a/google3/production/borg_mgmt/node_proxy/safepower/safepower_agent/copy.bara.sky
+++ b/google3/production/borg_mgmt/node_proxy/safepower/safepower_agent/copy.bara.sky
@@ -33,6 +33,8 @@
     "action_context.cc",
     "action_context.h",
     "action_hash.h",
+    "action_validation.cc",
+    "action_validation.h",
     "bmc/address_lookup.cc",
     "bmc/address_lookup.h",
     "bmc/auth.cc",
diff --git a/meson.build b/meson.build
index 030be19..b5b15de 100644
--- a/meson.build
+++ b/meson.build
@@ -160,12 +160,25 @@
   link_with: [libconvert_proto],
 )
 
+libaction_validation = static_library(
+  'action_validation',
+  'action_validation.cc',
+  include_directories: safepower_include,
+  implicit_include_directories: false,
+  dependencies: [safepower_deps],
+)
+action_validation = declare_dependency(
+  dependencies: safepower_deps,
+  include_directories: safepower_include,
+  link_with: [libaction_validation],
+)
+
 libaction_context = static_library(
   'action_context',
   'action_context.cc',
   include_directories: safepower_include,
   implicit_include_directories: false,
-  dependencies: [safepower_deps, convert_proto],
+  dependencies: [safepower_deps, convert_proto, action_validation],
 )
 action_context = declare_dependency(
   dependencies: safepower_deps,
@@ -228,6 +241,7 @@
   auth_bmc,
   grpcpp_auth,
   action_context,
+  action_validation,
   persistent_storage,
   daemon_context,
   daemon_context_bmc,
@@ -288,6 +302,7 @@
                        state_merge,
                        register_actions_bmc,
                        action_context,
+                       action_validation,
                        condition,
                        parse_text_proto,
                        static_state,
diff --git a/proto/safepower_agent_config.proto b/proto/safepower_agent_config.proto
index a91c92f..9dcaef5 100644
--- a/proto/safepower_agent_config.proto
+++ b/proto/safepower_agent_config.proto
@@ -17,6 +17,9 @@
 message ConditionValidationOptions {
   int32 max_boots = 1 [default = 4];
   int32 max_timeout_seconds = 2 [default = 7200];
+  // This is used to retry validtation checks on saved actions in case of
+  // transient errors.
+  int64 validation_retry_interval_ms = 3 [default = 1000];
 }
 
 // gpowerd specific configs will go here