| #include "bmc/remote_state_ping.h" |
| |
| #include <iterator> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "bmc/daemon_context_bmc.h" |
| #include "daemon_context.h" |
| #include "safepower_agent.pb.h" |
| #include "safepower_agent_config.pb.h" |
| #include "state_updater.h" |
| #include "one/network_interfaces.pb.h" |
| #include "one/offline_node_entities.pb.h" |
| #include "one/resolved_entities.pb.h" |
| #include "absl/functional/bind_front.h" |
| #include "absl/log/check.h" |
| #include "absl/log/log.h" |
| #include "absl/status/status.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/str_join.h" |
| #include "absl/synchronization/mutex.h" |
| #include "absl/time/time.h" |
| // NOLINTBEGIN(readability/boost) // Runs on BMC. |
| #include "boost/process.hpp" |
| #include "boost/process/args.hpp" |
| #include "boost/system.hpp" |
| // NOLINTEND(readability/boost) |
| |
| namespace safepower_agent { |
| |
| PingStateMonitor::PingStateMonitor( |
| std::string entity_tag, std::string hostname, |
| safepower_agent_config::PingConfig ping_config, |
| std::shared_ptr<StateUpdater<safepower_agent_proto::SystemState>> |
| system_state_updater) |
| : entity_tag_(std::move(entity_tag)), |
| hostname_(std::move(hostname)), |
| command_({std::move(*ping_config.mutable_command())}), |
| interval_(absl::Milliseconds(ping_config.interval_ms())), |
| system_state_updater_(std::move(system_state_updater)), |
| args_(std::make_move_iterator(ping_config.mutable_extra_args()->begin()), |
| std::make_move_iterator(ping_config.mutable_extra_args()->end())) { |
| args_.push_back("-c"); |
| args_.push_back(absl::StrCat(ping_config.count_per_check())); |
| args_.push_back(hostname_); |
| LOG(INFO) << "Ping command: " << command_ << " " << absl::StrJoin(args_, " "); |
| } |
| |
| PingStateMonitor::PingStateMonitor(PingStateMonitor&& other) |
| : entity_tag_(std::move(other.entity_tag_)), |
| hostname_(std::move(other.hostname_)), |
| command_(std::move(other.command_)), |
| interval_(std::move(other.interval_)), |
| system_state_updater_(std::move(other.system_state_updater_)), |
| args_(std::move(other.args_)), |
| reachable_state_(other.reachable_state_) { |
| CHECK(!other.Started()) << "PingStateMonitor moved while started"; |
| } |
| |
| PingStateMonitor& PingStateMonitor::operator=(PingStateMonitor&& other) { |
| if (Started()) { |
| LOG(DFATAL) << "PingStateMonitor replaced while started"; |
| return *this; |
| } |
| |
| if (other.Started()) { |
| LOG(DFATAL) << "PingStateMonitor moved while started"; |
| return *this; |
| } |
| |
| entity_tag_ = std::move(other.entity_tag_); |
| hostname_ = std::move(other.hostname_); |
| command_ = std::move(other.command_); |
| interval_ = std::move(other.interval_); |
| system_state_updater_ = std::move(other.system_state_updater_); |
| args_ = std::move(other.args_); |
| reachable_state_ = other.reachable_state_; |
| return *this; |
| } |
| |
| bool PingStateMonitor::Started() const { |
| absl::MutexLock lock(task_mutex_); |
| return !task_name_.empty() || activate_callback_handle_.pending() || |
| idle_callback_handle_.pending(); |
| } |
| |
| void PingStateMonitor::StopTimer() { |
| if (task_name_.empty()) { |
| return; |
| } |
| LOG(INFO) << "Stopping ping state monitor for " << entity_tag_; |
| absl::Status status = DaemonContext::Get().scheduler().CancelCall(task_name_); |
| if (!status.ok()) { |
| LOG(WARNING) << "Failed to cancel ping state monitor: " << status; |
| } |
| task_name_.clear(); |
| } |
| |
| void PingStateMonitor::StartTimer(absl::Duration interval) { |
| LOG(INFO) << "Starting ping state monitor for " << entity_tag_ |
| << " with interval " << interval; |
| std::string task_name = absl::StrCat("ping_state_monitor_", entity_tag_); |
| absl::Status status = DaemonContext::Get().scheduler().PeriodicCall( |
| absl::bind_front(&PingStateMonitor::Ping, this), interval, task_name); |
| if (!status.ok()) { |
| LOG(DFATAL) << "Failed to start ping state monitor: " << status; |
| return; |
| } |
| task_name_ = std::move(task_name); |
| } |
| |
| void PingStateMonitor::Pause() { |
| { |
| absl::MutexLock lock(task_mutex_); |
| if (task_name_.empty()) { |
| LOG(DFATAL) << "PingStateMonitor paused while task not started"; |
| } |
| StopTimer(); |
| // When the state updater is paused, we don't know the current state of the |
| // node. e.g. it is possible that the node goes down or comes back up while |
| // the updater is paused. When a new listener is added, setting an unknown |
| // state ensures that we never report a stale state. Since state comparisons |
| // are usually equality checks, this should not break any assumptions. |
| UpdateReachableState(ReachableState::kUnknown); |
| } |
| activate_callback_handle_ = system_state_updater_->OnActive( |
| absl::bind_front(&PingStateMonitor::Resume, this)); |
| } |
| |
| void PingStateMonitor::Resume() { |
| { |
| absl::MutexLock lock(task_mutex_); |
| if (!task_name_.empty()) { |
| LOG(DFATAL) << "PingStateMonitor resumed while task started"; |
| return; |
| } |
| StartTimer(interval_); |
| } |
| idle_callback_handle_ = system_state_updater_->OnIdle( |
| absl::bind_front(&PingStateMonitor::Pause, this)); |
| } |
| |
| absl::Status PingStateMonitor::Start() { |
| // Will start immediately if the state updater is idle. |
| activate_callback_handle_ = system_state_updater_->OnActive( |
| absl::bind_front(&PingStateMonitor::Resume, this)); |
| return absl::OkStatus(); |
| } |
| |
| PingStateMonitor::~PingStateMonitor() { |
| idle_callback_handle_.TryCancel(); |
| activate_callback_handle_.TryCancel(); |
| if (!task_mutex_.try_lock()) { |
| LOG(DFATAL) << "PingStateMonitor destroyed while mutex was held"; |
| task_mutex_.lock(); |
| } |
| StopTimer(); |
| task_mutex_.unlock(); |
| } |
| |
| // TODO(jebr): b/450140775 - occasionally log the ping command output |
| void PingStateMonitor::Ping() { |
| boost::process::async_system( |
| DaemonContextBMC::Get().get_io_context(), |
| absl::bind_front(&PingStateMonitor::ProcessPingResult, this), |
| boost::process::exe = command_, boost::process::args = args_, |
| (boost::process::std_out & boost::process::std_err) > |
| boost::process::null); |
| } |
| |
| void PingStateMonitor::ProcessPingResult(boost::system::error_code ec, |
| int exit_code) { |
| if (ec) { |
| LOG(ERROR) << "Failed to ping " << hostname_ << ": " << ec.message(); |
| return; |
| } |
| ReachableState new_reachable_state = exit_code == 0 |
| ? ReachableState::kReachable |
| : ReachableState::kUnreachable; |
| UpdateReachableState(new_reachable_state); |
| } |
| |
| void PingStateMonitor::UpdateReachableState( |
| ReachableState new_reachable_state) { |
| if (new_reachable_state == reachable_state_) { |
| return; |
| } |
| safepower_agent_proto::BootStateSpecifier state; |
| switch (new_reachable_state) { |
| case ReachableState::kUnknown: |
| LOG(INFO) << entity_tag_ << " is reachability state is unknown"; |
| state = safepower_agent_proto::BOOT_STATE_UNSPECIFIED; |
| break; |
| case ReachableState::kReachable: |
| LOG(INFO) << entity_tag_ << " is reachable"; |
| state = safepower_agent_proto::BOOT_STATE_BOOTING; |
| break; |
| case ReachableState::kUnreachable: |
| LOG(INFO) << entity_tag_ << " is unreachable"; |
| state = safepower_agent_proto::BOOT_STATE_NOT_BOOTED; |
| break; |
| default: |
| LOG(DFATAL) << "Unknown reachable state: " |
| << static_cast<int>(new_reachable_state); |
| return; |
| } |
| safepower_agent_proto::SystemState new_state; |
| (*new_state.mutable_node_state())[entity_tag_] |
| .mutable_boot_state() |
| ->set_state(state); |
| system_state_updater_->UpdateState(new_state); |
| reachable_state_ = new_reachable_state; |
| } |
| |
| } // namespace safepower_agent |