blob: cdfd07b9f1e9a8f62685b251f70f43df4a282bb6 [file] [log] [blame]
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "host_command.hpp"
#include "google3/host_commands.h"
#include "log_collector_util.hpp"
#include "message_intf.hpp"
#include "message_util.hpp"
#include "payload_update.hpp"
#include <stdplus/print.hpp>
#include <stdplus/raw.hpp>
#include <xyz/openbmc_project/Common/error.hpp>
#include <xyz/openbmc_project/Control/Hoth/error.hpp>
#include <chrono>
#include <fstream>
#include <future>
#include <span>
#include <sstream>
#include <thread>
#include <vector>
namespace google
{
namespace hoth
{
namespace internal
{
using sdbusplus::error::xyz::openbmc_project::control::hoth::CommandFailure;
using sdbusplus::error::xyz::openbmc_project::control::hoth::InterfaceError;
using sdbusplus::error::xyz::openbmc_project::control::hoth::ResponseFailure;
using sdbusplus::error::xyz::openbmc_project::common::Timeout;
HostCommandImpl::HostCommandImpl(MessageIntf* msg, boost::asio::io_context* io,
LogCollectorUtil* logCollectorUtil,
const std::string& name, size_t max_retries,
bool allowLegacyVerify,
const uint32_t uartChannelId)
: msg(msg), timer(*io), io(io), logCollectorUtil(logCollectorUtil),
name(name), max_retries(max_retries),
allowLegacyVerify(allowLegacyVerify), uartChannelId(uartChannelId)
{
if (uartChannelId != 0) {
fmt::println(stderr, "Triggering UART log collector for {}", name);
collectUartLogsAsync();
}
}
void HostCommandImpl::validateAndSend(const std::vector<uint8_t>& req)
{
auto actual_req = std::span<const uint8_t>(std::data(req), reqLen(req));
if (calculateChecksum(actual_req) != 0)
{
stdplus::print(stderr, "Checksum validation failed for host command\n");
throw CommandFailure();
}
try
{
msg->send(actual_req.data(), actual_req.size(), 0);
}
catch (const std::exception& e)
{
stdplus::print(stderr, "Failed to send command to Hoth: {}\n",
e.what());
throw InterfaceError();
}
}
void HostCommandImpl::recvAndValidate(std::vector<uint8_t>& rsp)
{
rsp.resize(sizeof(RspHeader));
try
{
msg->recv(rsp.data(), rsp.size(), 0);
}
catch (const std::exception& e)
{
stdplus::print("Failed to get response from Hoth: {}\n", e.what());
throw InterfaceError();
}
// Make room for the data blob
rsp.resize(rspLen(rsp), 0);
try
{
msg->recv(rsp.data() + sizeof(RspHeader),
rsp.size() - sizeof(RspHeader), sizeof(RspHeader));
}
catch (const std::exception& e)
{
stdplus::print(stderr, "Failed to get response from Hoth: {}\n",
e.what());
throw InterfaceError();
}
if (calculateChecksum(rsp) != 0)
{
stdplus::print(stderr,
"Checksum validation failed for Hoth response\n");
throw ResponseFailure();
}
}
std::vector<uint8_t>
HostCommandImpl::sendCommandNoLock(const std::vector<uint8_t>& command)
{
auto longRunning = isCommandLongRunning(command, allowLegacyVerify);
if (longRunning != EC_RES_SUCCESS)
{
stdplus::println(stderr, "Long running command failed in hothd. "
"Triggering Hoth log collection...");
collectHothLogsAsync(true); // dump hoth logs
return generateErrorResponse(longRunning);
}
std::vector<uint8_t> response;
for (size_t retries = 0;; ++retries)
{
try
{
validateAndSend(command);
recvAndValidate(response);
communicationFailed = false;
return response;
}
catch (const CommandFailure& e)
{
stdplus::println(
stderr,
"Command failure in hothd. Triggering Hoth Log collector...");
collectHothLogsAsync(true); // dump hoth logs
throw;
}
catch (...)
{
if (communicationFailed || retries >= max_retries)
{
communicationFailed = true;
stdplus::println(
stderr, "Communication failed and all retries exhausted in "
"hothd. Triggering Hoth Log Collector...");
collectHothLogsAsync(true); // dump hoth logs
throw;
}
// We want to exponentially back off the interface as retries fail
std::this_thread::sleep_for(
std::chrono::duration<size_t, std::deci>((1 << retries) - 1));
stdplus::print(stderr, "Retrying\n");
}
}
}
std::vector<uint8_t>
HostCommandImpl::sendCommand(const std::vector<uint8_t>& command)
{
std::unique_lock l(cmdLock);
return sendCommandNoLock(command);
}
std::vector<uint8_t>
HostCommandImpl::sendCommand(const std::vector<uint8_t>& command,
std::chrono::milliseconds timeout)
{
std::unique_lock l(cmdLock, timeout);
if (!l)
{
stdplus::print(stderr, "Command timed out to acquiring lock\n");
throw Timeout();
}
return sendCommandNoLock(command);
}
std::vector<uint8_t> HostCommandImpl::sendCommand(uint16_t command,
uint8_t commandVersion,
const void* request,
size_t requestSize)
{
struct ReqHeader requestHeader;
populateReqHeader(command, commandVersion, request, requestSize,
&requestHeader);
// Concatenate the request header and the request into one uint8_t vector to
// be sent to Hoth
auto* const requestHeaderPtr = reinterpret_cast<uint8_t*>(&requestHeader);
const auto* const requestPtr = static_cast<const uint8_t*>(request);
std::vector<uint8_t> populatedCommand(
requestHeaderPtr, requestHeaderPtr + sizeof(requestHeader));
populatedCommand.insert(populatedCommand.end(), requestPtr,
requestPtr + requestSize);
return sendCommand(populatedCommand);
}
std::vector<uint8_t>
HostCommandImpl::sendCommand(uint16_t command, uint8_t commandVersion,
const void* request, size_t requestSize,
std::chrono::milliseconds timeout)
{
struct ReqHeader requestHeader;
populateReqHeader(command, commandVersion, request, requestSize,
&requestHeader);
// Concatenate the request header and the request into one uint8_t vector to
// be sent to Hoth
auto* const requestHeaderPtr = reinterpret_cast<uint8_t*>(&requestHeader);
const auto* const requestPtr = static_cast<const uint8_t*>(request);
std::vector<uint8_t> populatedCommand(
requestHeaderPtr, requestHeaderPtr + sizeof(requestHeader));
populatedCommand.insert(populatedCommand.end(), requestPtr,
requestPtr + requestSize);
return sendCommand(populatedCommand, timeout);
}
uint64_t
HostCommandImpl::sendCommandAsync([[maybe_unused]] const std::vector<uint8_t>& command) {
// Not yet implemented
return 0;
}
std::vector<uint8_t> HostCommandImpl::getResponse([[maybe_unused]] uint64_t callToken) {
// Not yet implemented
std::vector<uint8_t> result;
return result;
}
bool HostCommandImpl::communicationFailure() const
{
return communicationFailed;
}
uint8_t HostCommandImpl::isCommandLongRunning(std::span<const uint8_t> cmd,
bool allowLegacyVerify)
{
try
{
auto& header = stdplus::raw::extractRef<ReqHeader>(cmd);
if (header.struct_version != SUPPORTED_STRUCT_VERSION ||
header.command !=
EC_CMD_BOARD_SPECIFIC_BASE + EC_PRV_CMD_HOTH_PAYLOAD_UPDATE)
{
return EC_RES_SUCCESS;
}
auto& req = stdplus::raw::extractRef<payload_update_packet>(cmd);
// INITIATE takes a very long time (2+min) for even small (32MB) flash
// FINALIZE takes as long as VERIFY, but isn't even used in production
// tooling so we can block it unconditionally. This is typically ~16s
// for 32MB flash and ~30s for 64MB flash.
if (req.type == PAYLOAD_UPDATE_INITIATE ||
req.type == PAYLOAD_UPDATE_FINALIZE)
{
return EC_RES_ACCESS_DENIED;
}
// The time for ERASE scales with the length of the erase. We want to
// limit this to a well known maximum duration.
if (req.type == PAYLOAD_UPDATE_ERASE && req.len > kMaxErase)
{
return EC_RES_ACCESS_DENIED;
}
// VERIFY_CHUNK implementations are known buggy and will not be fixed
// we don't want machines to get broken.
if (req.type == PAYLOAD_UPDATE_VERIFY_CHUNK)
{
// This should return ACCESS_DENIED, but can't currently since our
// tool implementations only check for INVALID_PARAM. Once we are
// sure the tools are phase out we can switch it.
return EC_RES_INVALID_PARAM;
}
// Newer systems can't allow legacy VERIFY commands as their execution
// time scales with the size of the flash. Older systems with smallish
// flash can allow this for compatibility.
if (allowLegacyVerify)
{
return EC_RES_SUCCESS;
}
if (req.type == PAYLOAD_UPDATE_VERIFY)
{
return EC_RES_ACCESS_DENIED;
}
}
catch (...)
{}
return EC_RES_SUCCESS;
}
void HostCommandImpl::collectHothLogs()
{
std::vector<uint8_t> snapshotBufferRequest =
google::hoth::internal::LogCollectorUtil::generateSnapshotRequest();
std::vector<uint8_t> snapshotResponse = sendCommand(snapshotBufferRequest);
if (!google::hoth::internal::LogCollectorUtil::isResponseValid(snapshotResponse))
{
fmt::println(stderr,
"Response invalid- Unable to snapshot hoth buffer!");
return;
}
fmt::println(stderr, "=============== Hoth Logs Begin ===============");
// Read the entire buffer consecutively
for (int extractionCounter = 0;
extractionCounter < ((kHothBufferSize / kTransactionBufferSize) + 1);
extractionCounter++)
{
std::vector<uint8_t> grabSnapshotRequest =
google::hoth::internal::LogCollectorUtil::generateGrabSnapshotRequest();
std::vector<uint8_t> grabSnapshotResponse =
sendCommand(grabSnapshotRequest);
if (!google::hoth::internal::LogCollectorUtil::isResponseValid(grabSnapshotResponse))
{
fmt::println(stderr,
"Response invalid- Unable to grab hoth snapshot!");
break;
}
grabSnapshotResponse.erase(grabSnapshotResponse.begin(),
grabSnapshotResponse.begin() +
sizeof(RspHeader));
google::hoth::internal::LogCollectorUtil::publishSnapshot(grabSnapshotResponse,
PublishType::Stderr);
if (grabSnapshotResponse.size() <
(kTransactionBufferSize - sizeof(struct RspHeader)))
{
// Read all the bytes in the buffer
if constexpr (kDebug)
{
fmt::println(stderr,
"Response size: {} while transactionBufferSize: "
"{}. All bytes read",
grabSnapshotResponse.size(),
kTransactionBufferSize);
}
break;
}
}
fmt::println(stderr, "=============== Hoth Logs Ends ===============");
}
void HostCommandImpl::setPromiseValue(int requestId)
{
if constexpr (kDebug)
{
fmt::println(stderr, "Set promise value for {}", requestId);
}
auto it = idToLogCollectorPromiseMap.find(requestId);
if (it != idToLogCollectorPromiseMap.end())
{
try
{
it->second.set_value();
}
catch (const std::exception& e)
{
fmt::println(stderr,
"Promise already set, exception {} for request id: {}",
e.what(), requestId);
}
}
else
{
fmt::println(stderr,
"No promise object found for requestId: {} for setting "
"promise value",
requestId);
}
}
void HostCommandImpl::setPromiseException(int requestId)
{
if constexpr (kDebug)
{
fmt::println(stderr, "Set promise exception for {}", requestId);
}
auto it = idToLogCollectorPromiseMap.find(requestId);
if (it != idToLogCollectorPromiseMap.end())
{
try
{
it->second.set_exception(std::current_exception());
}
catch (const std::exception& e)
{
fmt::println(stderr,
"Promise already set, exception {} for request id: {}",
e.what(), requestId);
}
}
else
{
fmt::println(stderr,
"No promise object found for requestId: {} for setting "
"promise exception",
requestId);
}
}
void HostCommandImpl::waitOnLogCollectorPromise(int requestId)
{
auto it = idToLogCollectorPromiseMap.find(requestId);
if (it != idToLogCollectorPromiseMap.end())
{
fmt::println(stderr, "Waiting on requestId: {}", requestId);
it->second.get_future().wait();
fmt::println(stderr, "Promise completed for requestId: {}", requestId);
}
else
{
fmt::println(
stderr,
"No promise object found for requestId: {} for cleaning request",
requestId);
}
cleanupLogCollectorRequest(requestId);
}
void HostCommandImpl::cleanupLogCollectorRequest(int requestId)
{
auto it = idToLogCollectorPromiseMap.find(requestId);
if (it != idToLogCollectorPromiseMap.end())
{
if constexpr (kDebug)
{
fmt::println(stderr, "Cleaning up request: {}", requestId);
}
idToLogCollectorPromiseMap.erase(requestId);
}
}
int HostCommandImpl::collectHothLogsAsync(bool cleanupPromiseAfterExecution)
{
std::promise<void> promise;
int requestId = logCollectorUtil->generateRequestId();
idToLogCollectorPromiseMap.emplace(requestId, std::move(promise));
if (logCollectorUtil->rateLimiter.allowedByLimiter())
{
fmt::println(stderr, "Collecting hoth logs for request id {}...",
requestId);
io->post([this, requestId, cleanupPromiseAfterExecution]() mutable {
try
{
this->collectHothLogs();
setPromiseValue(requestId);
}
catch (const std::exception& e)
{
fmt::println(stderr, "Exception while collecting logs: {}",
e.what());
setPromiseException(requestId);
}
if (cleanupPromiseAfterExecution)
{
cleanupLogCollectorRequest(requestId);
}
});
}
else
{
fmt::println(stderr,
"Collect Hoth Log request discarded due to rate limiter "
"for request id {}",
requestId);
setPromiseValue(requestId);
if (cleanupPromiseAfterExecution)
{
cleanupLogCollectorRequest(requestId);
}
}
return requestId;
}
std::optional<little_uint32_t> HostCommandImpl::getChannelWriteOffset()
{
if constexpr (kDebug)
{
fmt::println(stderr, "Getting initial offset with status command...");
}
std::vector<uint8_t> offsetRequest =
google::hoth::internal::LogCollectorUtil::generateGetChannelWriteOffsetRequest(uartChannelId);
std::vector<uint8_t> offsetResponse;
try
{
offsetResponse =
sendCommand(offsetRequest, std::chrono::milliseconds(500));
}
catch (const std::exception& e)
{
fmt::println(stderr,
"Failure at sendHostCommand for getting write offset for "
"channelId: {}, with error: {}",
uartChannelId, e.what());
return std::nullopt;
}
if (!google::hoth::internal::LogCollectorUtil::isResponseValid(offsetResponse))
{
fmt::println(stderr,
"Response invalid- Unable to grab write offset for UART "
"logs- channelId: {}",
uartChannelId);
return std::nullopt;
}
struct ec_channel_status_response* channelWriteOffsetResp =
reinterpret_cast<struct ec_channel_status_response*>(
&offsetResponse[sizeof(struct RspHeader)]);
little_uint32_t writeOffset = channelWriteOffsetResp->write_offset;
return writeOffset;
}
uint32_t HostCommandImpl::collectUartLogs(uint32_t offset)
{
if constexpr (kDebug)
{
std::string heartbeatData = fmt::format(
"Collecting Uart logs for {} with offset {}", name, offset);
google::hoth::internal::LogCollectorUtil::heartbeat(heartbeatData);
}
std::vector<uint8_t> uartLogsRequest =
google::hoth::internal::LogCollectorUtil::generateCollectUartLogsRequest(uartChannelId, offset);
std::vector<uint8_t> readResponse =
sendCommand(uartLogsRequest, std::chrono::milliseconds(500));
std::span<const uint8_t> output = readResponse;
auto& rsp = stdplus::raw::extractRef<RspHeader>(output);
if (rsp.result != EC_RES_SUCCESS)
{
fmt::println(stderr, "Response invalid: {}",
static_cast<uint8_t>(rsp.result));
throw ResponseFailure();
}
const auto& readResponseBody =
stdplus::raw::copyFrom<ec_channel_read_response>(output);
std::string meta;
if (offset != readResponseBody.offset)
{
meta = fmt::format(
"Missed some logs (Reason: High Verbosity). Offset sent: {}, "
"Offset received: {}",
offset, static_cast<uint32_t>(readResponseBody.offset));
}
uint32_t nextReadOffset =
readResponseBody.offset +
(output.size() - sizeof(ec_channel_read_response));
if constexpr (kDebug)
{
fmt::println(stderr, "Current response length: {}, Next offset: {}",
readResponse.size(), nextReadOffset);
}
// Remove the header and return the response
readResponse.erase(readResponse.begin(),
readResponse.begin() + sizeof(RspHeader) +
sizeof(ec_channel_read_response));
google::hoth::internal::LogCollectorUtil::publishSnapshot(readResponse, PublishType::File, name,
meta);
return nextReadOffset;
}
void HostCommandImpl::collectUartLogsAsync()
{
if (runUartCollector)
{
fmt::println(
stderr,
"Uart log scheduler already running. Discarding trigger request");
return;
}
runUartCollector = true;
std::optional<little_uint32_t> offset =
getChannelWriteOffset(); // one synchronous operation
if (!offset.has_value())
{
fmt::println(stderr, "Unable to trigger UART log collector due to "
"initial offset retrieval failure");
runUartCollector = false;
scheduleUartLogCollector(); // call scheduler for retry logic
}
// Compute a read-offset that is guaranteed to be outside of the Hoth
// buffer by offsetting write-offset, since there is no way to directly get
// read-offset.
else
{
uint32_t readOffset =
static_cast<uint32_t>(offset.value()) - kReadOffsetTweak;
fmt::println(stderr,
"Triggering scheduler with initial offset {} for name: {}",
readOffset, name);
scheduleUartLogCollector(readOffset);
}
}
void HostCommandImpl::scheduleUartLogCollector(little_uint32_t offset)
{
timer.expires_from_now(
std::chrono::seconds(logCollectorUtil->getAsyncWaitTime()));
timer.async_wait([this, offset](const boost::system::error_code& error) {
if (!error)
{
if (runUartCollector)
{
uint32_t resultOffset;
try
{
resultOffset = collectUartLogs(offset);
}
catch (const std::exception& e)
{
fmt::println(
stderr,
"Exception occured while collecting logs: ", e.what());
resultOffset = offset;
}
scheduleUartLogCollector(resultOffset);
}
else
{
fmt::println(
stderr,
"UART log Collector was not working. Retries left: {}",
kMaxLogCollectorRetries - logCollectorRetryCounter);
if (logCollectorRetryCounter < kMaxLogCollectorRetries)
{
// Retry to trigger the log collector again with the async
// wait time
fmt::println(stderr, "Triggering collector again...");
logCollectorRetryCounter++;
collectUartLogsAsync();
}
else
{
fmt::println(stderr, "Stopping UART log Collector. Max "
"retries exhausted");
runUartCollector = false;
}
}
}
else
{
fmt::println(stderr,
"Failing UART log Collector due to async wait "
"response handler failure {} value {}",
error.message(), error.value());
}
});
}
void HostCommandImpl::stopUartLogs()
{
runUartCollector = false;
}
} // namespace internal
} // namespace hoth
} // namespace google