| // Copyright 2024 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "host_command.hpp" |
| |
| #include "google3/host_commands.h" |
| |
| #include "log_collector_util.hpp" |
| #include "message_intf.hpp" |
| #include "message_util.hpp" |
| #include "payload_update.hpp" |
| |
| #include <stdplus/print.hpp> |
| #include <stdplus/raw.hpp> |
| #include <xyz/openbmc_project/Common/error.hpp> |
| #include <xyz/openbmc_project/Control/Hoth/error.hpp> |
| |
| #include <chrono> |
| #include <fstream> |
| #include <future> |
| #include <span> |
| #include <sstream> |
| #include <thread> |
| #include <vector> |
| |
| namespace google |
| { |
| namespace hoth |
| { |
| namespace internal |
| { |
| |
| using sdbusplus::error::xyz::openbmc_project::control::hoth::CommandFailure; |
| using sdbusplus::error::xyz::openbmc_project::control::hoth::InterfaceError; |
| using sdbusplus::error::xyz::openbmc_project::control::hoth::ResponseFailure; |
| using sdbusplus::error::xyz::openbmc_project::common::Timeout; |
| |
| HostCommandImpl::HostCommandImpl(MessageIntf* msg, boost::asio::io_context* io, |
| LogCollectorUtil* logCollectorUtil, |
| const std::string& name, size_t max_retries, |
| bool allowLegacyVerify, |
| const uint32_t uartChannelId) |
| : msg(msg), timer(*io), io(io), logCollectorUtil(logCollectorUtil), |
| name(name), max_retries(max_retries), |
| allowLegacyVerify(allowLegacyVerify), uartChannelId(uartChannelId) |
| { |
| if (uartChannelId != 0) { |
| fmt::println(stderr, "Triggering UART log collector for {}", name); |
| collectUartLogsAsync(); |
| } |
| } |
| |
| void HostCommandImpl::validateAndSend(const std::vector<uint8_t>& req) |
| { |
| auto actual_req = std::span<const uint8_t>(std::data(req), reqLen(req)); |
| |
| if (calculateChecksum(actual_req) != 0) |
| { |
| stdplus::print(stderr, "Checksum validation failed for host command\n"); |
| throw CommandFailure(); |
| } |
| try |
| { |
| msg->send(actual_req.data(), actual_req.size(), 0); |
| } |
| catch (const std::exception& e) |
| { |
| stdplus::print(stderr, "Failed to send command to Hoth: {}\n", |
| e.what()); |
| throw InterfaceError(); |
| } |
| } |
| |
| void HostCommandImpl::recvAndValidate(std::vector<uint8_t>& rsp) |
| { |
| rsp.resize(sizeof(RspHeader)); |
| |
| try |
| { |
| msg->recv(rsp.data(), rsp.size(), 0); |
| } |
| catch (const std::exception& e) |
| { |
| stdplus::print("Failed to get response from Hoth: {}\n", e.what()); |
| throw InterfaceError(); |
| } |
| |
| // Make room for the data blob |
| rsp.resize(rspLen(rsp), 0); |
| try |
| { |
| msg->recv(rsp.data() + sizeof(RspHeader), |
| rsp.size() - sizeof(RspHeader), sizeof(RspHeader)); |
| } |
| catch (const std::exception& e) |
| { |
| stdplus::print(stderr, "Failed to get response from Hoth: {}\n", |
| e.what()); |
| throw InterfaceError(); |
| } |
| |
| if (calculateChecksum(rsp) != 0) |
| { |
| stdplus::print(stderr, |
| "Checksum validation failed for Hoth response\n"); |
| throw ResponseFailure(); |
| } |
| } |
| |
| std::vector<uint8_t> |
| HostCommandImpl::sendCommandNoLock(const std::vector<uint8_t>& command) |
| { |
| auto longRunning = isCommandLongRunning(command, allowLegacyVerify); |
| if (longRunning != EC_RES_SUCCESS) |
| { |
| stdplus::println(stderr, "Long running command failed in hothd. " |
| "Triggering Hoth log collection..."); |
| collectHothLogsAsync(true); // dump hoth logs |
| return generateErrorResponse(longRunning); |
| } |
| |
| std::vector<uint8_t> response; |
| for (size_t retries = 0;; ++retries) |
| { |
| try |
| { |
| validateAndSend(command); |
| recvAndValidate(response); |
| communicationFailed = false; |
| return response; |
| } |
| catch (const CommandFailure& e) |
| { |
| stdplus::println( |
| stderr, |
| "Command failure in hothd. Triggering Hoth Log collector..."); |
| collectHothLogsAsync(true); // dump hoth logs |
| throw; |
| } |
| catch (...) |
| { |
| if (communicationFailed || retries >= max_retries) |
| { |
| communicationFailed = true; |
| stdplus::println( |
| stderr, "Communication failed and all retries exhausted in " |
| "hothd. Triggering Hoth Log Collector..."); |
| collectHothLogsAsync(true); // dump hoth logs |
| throw; |
| } |
| // We want to exponentially back off the interface as retries fail |
| std::this_thread::sleep_for( |
| std::chrono::duration<size_t, std::deci>((1 << retries) - 1)); |
| stdplus::print(stderr, "Retrying\n"); |
| } |
| } |
| } |
| |
| std::vector<uint8_t> |
| HostCommandImpl::sendCommand(const std::vector<uint8_t>& command) |
| { |
| std::unique_lock l(cmdLock); |
| return sendCommandNoLock(command); |
| } |
| |
| std::vector<uint8_t> |
| HostCommandImpl::sendCommand(const std::vector<uint8_t>& command, |
| std::chrono::milliseconds timeout) |
| { |
| std::unique_lock l(cmdLock, timeout); |
| if (!l) |
| { |
| stdplus::print(stderr, "Command timed out to acquiring lock\n"); |
| throw Timeout(); |
| } |
| return sendCommandNoLock(command); |
| } |
| |
| std::vector<uint8_t> HostCommandImpl::sendCommand(uint16_t command, |
| uint8_t commandVersion, |
| const void* request, |
| size_t requestSize) |
| { |
| struct ReqHeader requestHeader; |
| populateReqHeader(command, commandVersion, request, requestSize, |
| &requestHeader); |
| |
| // Concatenate the request header and the request into one uint8_t vector to |
| // be sent to Hoth |
| auto* const requestHeaderPtr = reinterpret_cast<uint8_t*>(&requestHeader); |
| const auto* const requestPtr = static_cast<const uint8_t*>(request); |
| std::vector<uint8_t> populatedCommand( |
| requestHeaderPtr, requestHeaderPtr + sizeof(requestHeader)); |
| populatedCommand.insert(populatedCommand.end(), requestPtr, |
| requestPtr + requestSize); |
| |
| return sendCommand(populatedCommand); |
| } |
| |
| std::vector<uint8_t> |
| HostCommandImpl::sendCommand(uint16_t command, uint8_t commandVersion, |
| const void* request, size_t requestSize, |
| std::chrono::milliseconds timeout) |
| { |
| struct ReqHeader requestHeader; |
| populateReqHeader(command, commandVersion, request, requestSize, |
| &requestHeader); |
| |
| // Concatenate the request header and the request into one uint8_t vector to |
| // be sent to Hoth |
| auto* const requestHeaderPtr = reinterpret_cast<uint8_t*>(&requestHeader); |
| const auto* const requestPtr = static_cast<const uint8_t*>(request); |
| std::vector<uint8_t> populatedCommand( |
| requestHeaderPtr, requestHeaderPtr + sizeof(requestHeader)); |
| populatedCommand.insert(populatedCommand.end(), requestPtr, |
| requestPtr + requestSize); |
| |
| return sendCommand(populatedCommand, timeout); |
| } |
| |
| uint64_t |
| HostCommandImpl::sendCommandAsync([[maybe_unused]] const std::vector<uint8_t>& command) { |
| // Not yet implemented |
| return 0; |
| } |
| |
| std::vector<uint8_t> HostCommandImpl::getResponse([[maybe_unused]] uint64_t callToken) { |
| // Not yet implemented |
| std::vector<uint8_t> result; |
| |
| return result; |
| } |
| |
| bool HostCommandImpl::communicationFailure() const |
| { |
| return communicationFailed; |
| } |
| |
| uint8_t HostCommandImpl::isCommandLongRunning(std::span<const uint8_t> cmd, |
| bool allowLegacyVerify) |
| { |
| try |
| { |
| auto& header = stdplus::raw::extractRef<ReqHeader>(cmd); |
| if (header.struct_version != SUPPORTED_STRUCT_VERSION || |
| header.command != |
| EC_CMD_BOARD_SPECIFIC_BASE + EC_PRV_CMD_HOTH_PAYLOAD_UPDATE) |
| { |
| return EC_RES_SUCCESS; |
| } |
| auto& req = stdplus::raw::extractRef<payload_update_packet>(cmd); |
| // INITIATE takes a very long time (2+min) for even small (32MB) flash |
| // FINALIZE takes as long as VERIFY, but isn't even used in production |
| // tooling so we can block it unconditionally. This is typically ~16s |
| // for 32MB flash and ~30s for 64MB flash. |
| if (req.type == PAYLOAD_UPDATE_INITIATE || |
| req.type == PAYLOAD_UPDATE_FINALIZE) |
| { |
| return EC_RES_ACCESS_DENIED; |
| } |
| // The time for ERASE scales with the length of the erase. We want to |
| // limit this to a well known maximum duration. |
| if (req.type == PAYLOAD_UPDATE_ERASE && req.len > kMaxErase) |
| { |
| return EC_RES_ACCESS_DENIED; |
| } |
| // VERIFY_CHUNK implementations are known buggy and will not be fixed |
| // we don't want machines to get broken. |
| if (req.type == PAYLOAD_UPDATE_VERIFY_CHUNK) |
| { |
| // This should return ACCESS_DENIED, but can't currently since our |
| // tool implementations only check for INVALID_PARAM. Once we are |
| // sure the tools are phase out we can switch it. |
| return EC_RES_INVALID_PARAM; |
| } |
| // Newer systems can't allow legacy VERIFY commands as their execution |
| // time scales with the size of the flash. Older systems with smallish |
| // flash can allow this for compatibility. |
| if (allowLegacyVerify) |
| { |
| return EC_RES_SUCCESS; |
| } |
| if (req.type == PAYLOAD_UPDATE_VERIFY) |
| { |
| return EC_RES_ACCESS_DENIED; |
| } |
| } |
| catch (...) |
| {} |
| |
| return EC_RES_SUCCESS; |
| } |
| |
| void HostCommandImpl::collectHothLogs() |
| { |
| std::vector<uint8_t> snapshotBufferRequest = |
| google::hoth::internal::LogCollectorUtil::generateSnapshotRequest(); |
| std::vector<uint8_t> snapshotResponse = sendCommand(snapshotBufferRequest); |
| if (!google::hoth::internal::LogCollectorUtil::isResponseValid(snapshotResponse)) |
| { |
| fmt::println(stderr, |
| "Response invalid- Unable to snapshot hoth buffer!"); |
| return; |
| } |
| |
| fmt::println(stderr, "=============== Hoth Logs Begin ==============="); |
| |
| // Read the entire buffer consecutively |
| for (int extractionCounter = 0; |
| extractionCounter < ((kHothBufferSize / kTransactionBufferSize) + 1); |
| extractionCounter++) |
| { |
| std::vector<uint8_t> grabSnapshotRequest = |
| google::hoth::internal::LogCollectorUtil::generateGrabSnapshotRequest(); |
| |
| std::vector<uint8_t> grabSnapshotResponse = |
| sendCommand(grabSnapshotRequest); |
| if (!google::hoth::internal::LogCollectorUtil::isResponseValid(grabSnapshotResponse)) |
| { |
| fmt::println(stderr, |
| "Response invalid- Unable to grab hoth snapshot!"); |
| break; |
| } |
| grabSnapshotResponse.erase(grabSnapshotResponse.begin(), |
| grabSnapshotResponse.begin() + |
| sizeof(RspHeader)); |
| google::hoth::internal::LogCollectorUtil::publishSnapshot(grabSnapshotResponse, |
| PublishType::Stderr); |
| |
| if (grabSnapshotResponse.size() < |
| (kTransactionBufferSize - sizeof(struct RspHeader))) |
| { |
| // Read all the bytes in the buffer |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, |
| "Response size: {} while transactionBufferSize: " |
| "{}. All bytes read", |
| grabSnapshotResponse.size(), |
| kTransactionBufferSize); |
| } |
| break; |
| } |
| } |
| fmt::println(stderr, "=============== Hoth Logs Ends ==============="); |
| } |
| |
| void HostCommandImpl::setPromiseValue(int requestId) |
| { |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, "Set promise value for {}", requestId); |
| } |
| auto it = idToLogCollectorPromiseMap.find(requestId); |
| if (it != idToLogCollectorPromiseMap.end()) |
| { |
| try |
| { |
| it->second.set_value(); |
| } |
| catch (const std::exception& e) |
| { |
| fmt::println(stderr, |
| "Promise already set, exception {} for request id: {}", |
| e.what(), requestId); |
| } |
| } |
| else |
| { |
| fmt::println(stderr, |
| "No promise object found for requestId: {} for setting " |
| "promise value", |
| requestId); |
| } |
| } |
| |
| void HostCommandImpl::setPromiseException(int requestId) |
| { |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, "Set promise exception for {}", requestId); |
| } |
| auto it = idToLogCollectorPromiseMap.find(requestId); |
| if (it != idToLogCollectorPromiseMap.end()) |
| { |
| try |
| { |
| it->second.set_exception(std::current_exception()); |
| } |
| catch (const std::exception& e) |
| { |
| fmt::println(stderr, |
| "Promise already set, exception {} for request id: {}", |
| e.what(), requestId); |
| } |
| } |
| else |
| { |
| fmt::println(stderr, |
| "No promise object found for requestId: {} for setting " |
| "promise exception", |
| requestId); |
| } |
| } |
| |
| void HostCommandImpl::waitOnLogCollectorPromise(int requestId) |
| { |
| auto it = idToLogCollectorPromiseMap.find(requestId); |
| if (it != idToLogCollectorPromiseMap.end()) |
| { |
| fmt::println(stderr, "Waiting on requestId: {}", requestId); |
| it->second.get_future().wait(); |
| fmt::println(stderr, "Promise completed for requestId: {}", requestId); |
| } |
| else |
| { |
| fmt::println( |
| stderr, |
| "No promise object found for requestId: {} for cleaning request", |
| requestId); |
| } |
| cleanupLogCollectorRequest(requestId); |
| } |
| |
| void HostCommandImpl::cleanupLogCollectorRequest(int requestId) |
| { |
| auto it = idToLogCollectorPromiseMap.find(requestId); |
| if (it != idToLogCollectorPromiseMap.end()) |
| { |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, "Cleaning up request: {}", requestId); |
| } |
| idToLogCollectorPromiseMap.erase(requestId); |
| } |
| } |
| |
| int HostCommandImpl::collectHothLogsAsync(bool cleanupPromiseAfterExecution) |
| { |
| std::promise<void> promise; |
| int requestId = logCollectorUtil->generateRequestId(); |
| idToLogCollectorPromiseMap.emplace(requestId, std::move(promise)); |
| if (logCollectorUtil->rateLimiter.allowedByLimiter()) |
| { |
| fmt::println(stderr, "Collecting hoth logs for request id {}...", |
| requestId); |
| |
| io->post([this, requestId, cleanupPromiseAfterExecution]() mutable { |
| try |
| { |
| this->collectHothLogs(); |
| setPromiseValue(requestId); |
| } |
| catch (const std::exception& e) |
| { |
| fmt::println(stderr, "Exception while collecting logs: {}", |
| e.what()); |
| setPromiseException(requestId); |
| } |
| if (cleanupPromiseAfterExecution) |
| { |
| cleanupLogCollectorRequest(requestId); |
| } |
| }); |
| } |
| else |
| { |
| fmt::println(stderr, |
| "Collect Hoth Log request discarded due to rate limiter " |
| "for request id {}", |
| requestId); |
| setPromiseValue(requestId); |
| if (cleanupPromiseAfterExecution) |
| { |
| cleanupLogCollectorRequest(requestId); |
| } |
| } |
| return requestId; |
| } |
| |
| std::optional<little_uint32_t> HostCommandImpl::getChannelWriteOffset() |
| { |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, "Getting initial offset with status command..."); |
| } |
| std::vector<uint8_t> offsetRequest = |
| google::hoth::internal::LogCollectorUtil::generateGetChannelWriteOffsetRequest(uartChannelId); |
| std::vector<uint8_t> offsetResponse; |
| try |
| { |
| offsetResponse = |
| sendCommand(offsetRequest, std::chrono::milliseconds(500)); |
| } |
| catch (const std::exception& e) |
| { |
| fmt::println(stderr, |
| "Failure at sendHostCommand for getting write offset for " |
| "channelId: {}, with error: {}", |
| uartChannelId, e.what()); |
| return std::nullopt; |
| } |
| |
| if (!google::hoth::internal::LogCollectorUtil::isResponseValid(offsetResponse)) |
| { |
| fmt::println(stderr, |
| "Response invalid- Unable to grab write offset for UART " |
| "logs- channelId: {}", |
| uartChannelId); |
| return std::nullopt; |
| } |
| |
| struct ec_channel_status_response* channelWriteOffsetResp = |
| reinterpret_cast<struct ec_channel_status_response*>( |
| &offsetResponse[sizeof(struct RspHeader)]); |
| little_uint32_t writeOffset = channelWriteOffsetResp->write_offset; |
| return writeOffset; |
| } |
| |
| uint32_t HostCommandImpl::collectUartLogs(uint32_t offset) |
| { |
| if constexpr (kDebug) |
| { |
| std::string heartbeatData = fmt::format( |
| "Collecting Uart logs for {} with offset {}", name, offset); |
| google::hoth::internal::LogCollectorUtil::heartbeat(heartbeatData); |
| } |
| |
| std::vector<uint8_t> uartLogsRequest = |
| google::hoth::internal::LogCollectorUtil::generateCollectUartLogsRequest(uartChannelId, offset); |
| |
| std::vector<uint8_t> readResponse = |
| sendCommand(uartLogsRequest, std::chrono::milliseconds(500)); |
| |
| std::span<const uint8_t> output = readResponse; |
| auto& rsp = stdplus::raw::extractRef<RspHeader>(output); |
| |
| if (rsp.result != EC_RES_SUCCESS) |
| { |
| fmt::println(stderr, "Response invalid: {}", |
| static_cast<uint8_t>(rsp.result)); |
| throw ResponseFailure(); |
| } |
| |
| const auto& readResponseBody = |
| stdplus::raw::copyFrom<ec_channel_read_response>(output); |
| std::string meta; |
| if (offset != readResponseBody.offset) |
| { |
| meta = fmt::format( |
| "Missed some logs (Reason: High Verbosity). Offset sent: {}, " |
| "Offset received: {}", |
| offset, static_cast<uint32_t>(readResponseBody.offset)); |
| } |
| uint32_t nextReadOffset = |
| readResponseBody.offset + |
| (output.size() - sizeof(ec_channel_read_response)); |
| |
| if constexpr (kDebug) |
| { |
| fmt::println(stderr, "Current response length: {}, Next offset: {}", |
| readResponse.size(), nextReadOffset); |
| } |
| |
| // Remove the header and return the response |
| readResponse.erase(readResponse.begin(), |
| readResponse.begin() + sizeof(RspHeader) + |
| sizeof(ec_channel_read_response)); |
| |
| google::hoth::internal::LogCollectorUtil::publishSnapshot(readResponse, PublishType::File, name, |
| meta); |
| return nextReadOffset; |
| } |
| |
| void HostCommandImpl::collectUartLogsAsync() |
| { |
| if (runUartCollector) |
| { |
| fmt::println( |
| stderr, |
| "Uart log scheduler already running. Discarding trigger request"); |
| return; |
| } |
| |
| runUartCollector = true; |
| std::optional<little_uint32_t> offset = |
| getChannelWriteOffset(); // one synchronous operation |
| if (!offset.has_value()) |
| { |
| fmt::println(stderr, "Unable to trigger UART log collector due to " |
| "initial offset retrieval failure"); |
| runUartCollector = false; |
| scheduleUartLogCollector(); // call scheduler for retry logic |
| } |
| // Compute a read-offset that is guaranteed to be outside of the Hoth |
| // buffer by offsetting write-offset, since there is no way to directly get |
| // read-offset. |
| else |
| { |
| uint32_t readOffset = |
| static_cast<uint32_t>(offset.value()) - kReadOffsetTweak; |
| fmt::println(stderr, |
| "Triggering scheduler with initial offset {} for name: {}", |
| readOffset, name); |
| scheduleUartLogCollector(readOffset); |
| } |
| } |
| |
| void HostCommandImpl::scheduleUartLogCollector(little_uint32_t offset) |
| { |
| timer.expires_from_now( |
| std::chrono::seconds(logCollectorUtil->getAsyncWaitTime())); |
| timer.async_wait([this, offset](const boost::system::error_code& error) { |
| if (!error) |
| { |
| if (runUartCollector) |
| { |
| uint32_t resultOffset; |
| try |
| { |
| resultOffset = collectUartLogs(offset); |
| } |
| catch (const std::exception& e) |
| { |
| fmt::println( |
| stderr, |
| "Exception occured while collecting logs: ", e.what()); |
| resultOffset = offset; |
| } |
| scheduleUartLogCollector(resultOffset); |
| } |
| else |
| { |
| fmt::println( |
| stderr, |
| "UART log Collector was not working. Retries left: {}", |
| kMaxLogCollectorRetries - logCollectorRetryCounter); |
| if (logCollectorRetryCounter < kMaxLogCollectorRetries) |
| { |
| // Retry to trigger the log collector again with the async |
| // wait time |
| fmt::println(stderr, "Triggering collector again..."); |
| logCollectorRetryCounter++; |
| collectUartLogsAsync(); |
| } |
| else |
| { |
| fmt::println(stderr, "Stopping UART log Collector. Max " |
| "retries exhausted"); |
| runUartCollector = false; |
| } |
| } |
| } |
| else |
| { |
| fmt::println(stderr, |
| "Failing UART log Collector due to async wait " |
| "response handler failure {} value {}", |
| error.message(), error.value()); |
| } |
| }); |
| } |
| |
| void HostCommandImpl::stopUartLogs() |
| { |
| runUartCollector = false; |
| } |
| |
| } // namespace internal |
| |
| } // namespace hoth |
| |
| } // namespace google |