#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Build a livepatch module

# shellcheck disable=SC1090,SC2155

if (( BASH_VERSINFO[0]  < 4 || \
     (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 4) )); then
		echo "error: this script requires bash 4.4+" >&2
	exit 1
fi

set -o errexit
set -o errtrace
set -o pipefail
set -o nounset

# Allow doing 'cmd | mapfile -t array' instead of 'mapfile -t array < <(cmd)'.
# This helps keep execution in pipes so pipefail+errexit can catch errors.
shopt -s lastpipe

unset DEBUG_CLONE DIFF_CHECKSUM SKIP_CLEANUP XTRACE

REPLACE=1
SHORT_CIRCUIT=0
JOBS="$(getconf _NPROCESSORS_ONLN)"
VERBOSE="-s"
shopt -o xtrace | grep -q 'on' && XTRACE=1

# Avoid removing the previous $TMP_DIR until args have been fully processed.
KEEP_TMP=1

SCRIPT="$(basename "$0")"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
FIX_PATCH_LINES="$SCRIPT_DIR/fix-patch-lines"

SRC="$(pwd)"
OBJ="$(pwd)"

CONFIG="$OBJ/.config"
TMP_DIR="$OBJ/klp-tmp"

ORIG_DIR="$TMP_DIR/orig"
PATCHED_DIR="$TMP_DIR/patched"
DIFF_DIR="$TMP_DIR/diff"
KMOD_DIR="$TMP_DIR/kmod"

STASH_DIR="$TMP_DIR/stash"
TIMESTAMP="$TMP_DIR/timestamp"
PATCH_TMP_DIR="$TMP_DIR/tmp"

KLP_DIFF_LOG="$DIFF_DIR/diff.log"

grep0() {
	command grep "$@" || true
}

status() {
	echo "$*"
}

warn() {
	echo "error: $SCRIPT: $*" >&2
}

die() {
	warn "$@"
	exit 1
}

declare -a STASHED_FILES

stash_file() {
	local file="$1"
	local rel_file="${file#"$SRC"/}"

	[[ ! -e "$file" ]] && die "no file to stash: $file"

	mkdir -p "$STASH_DIR/$(dirname "$rel_file")"
	cp -f "$file" "$STASH_DIR/$rel_file"

	STASHED_FILES+=("$rel_file")
}

restore_files() {
	local file

	for file in "${STASHED_FILES[@]}"; do
		mv -f "$STASH_DIR/$file" "$SRC/$file" || warn "can't restore file: $file"
	done

	STASHED_FILES=()
}

cleanup() {
	set +o nounset
	revert_patches "--recount"
	restore_files
	[[ "$KEEP_TMP" -eq 0 ]] && rm -rf "$TMP_DIR"
	return 0
}

trap_err() {
	warn "line ${BASH_LINENO[0]}: '$BASH_COMMAND'"
}

trap cleanup  EXIT INT TERM HUP
trap trap_err ERR

__usage() {
	cat <<EOF
Usage: $SCRIPT [OPTIONS] PATCH_FILE(s)
Generate a livepatch module.

Options:
   -f, --show-first-changed	Show address of first changed instruction
   -j, --jobs=<jobs>		Build jobs to run simultaneously [default: $JOBS]
   -o, --output=<file.ko>	Output file [default: livepatch-<patch-name>.ko]
       --no-replace		Disable livepatch atomic replace
   -v, --verbose		Pass V=1 to kernel/module builds

Advanced Options:
   -d, --debug			Show symbol/reloc cloning decisions
   -S, --short-circuit=STEP	Start at build step (requires prior --keep-tmp)
				   1|orig	Build original kernel (default)
				   2|patched	Build patched kernel
				   3|diff	Diff objects
				   4|kmod	Build patch module
   -T, --keep-tmp		Preserve tmp dir on exit

EOF
}

usage() {
	__usage >&2
}

process_args() {
	local keep_tmp=0
	local short
	local long
	local args

	short="hfj:o:vdS:T"
	long="help,show-first-changed,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp"

	args=$(getopt --options "$short" --longoptions "$long" -- "$@") || {
		echo; usage; exit
	}
	eval set -- "$args"

	while true; do
		case "$1" in
			-h | --help)
				usage
				exit 0
				;;
			-f | --show-first-changed)
				DIFF_CHECKSUM=1
				shift
				;;
			-j | --jobs)
				JOBS="$2"
				shift 2
				;;
			-o | --output)
				[[ "$2" != *.ko ]] && die "output filename should end with .ko"
				OUTFILE="$2"
				NAME="$(basename "$OUTFILE")"
				NAME="${NAME%.ko}"
				NAME="$(module_name_string "$NAME")"
				shift 2
				;;
			--no-replace)
				REPLACE=0
				shift
				;;
			-v | --verbose)
				VERBOSE="V=1"
				shift
				;;
			-d | --debug)
				DEBUG_CLONE=1
				keep_tmp=1
				shift
				;;
			-S | --short-circuit)
				[[ ! -d "$TMP_DIR" ]] && die "--short-circuit requires preserved klp-tmp dir"
				keep_tmp=1
				case "$2" in
					1 | orig)	SHORT_CIRCUIT=1; ;;
					2 | patched)	SHORT_CIRCUIT=2; ;;
					3 | diff)	SHORT_CIRCUIT=3; ;;
					4 | mod)	SHORT_CIRCUIT=4; ;;
					*)		die "invalid short-circuit step '$2'" ;;
				esac
				shift 2
				;;
			-T | --keep-tmp)
				keep_tmp=1
				shift
				;;
			--)
				shift
				break
				;;
			*)
				usage
				exit 1
				;;
		esac
	done

	if [[ $# -eq 0 ]]; then
		usage
		exit 1
	fi

	KEEP_TMP="$keep_tmp"
	PATCHES=("$@")
}

# temporarily disable xtrace for especially verbose code
xtrace_save() {
	[[ -v XTRACE ]] && set +x
	return 0
}

xtrace_restore() {
	[[ -v XTRACE ]] && set -x
	return 0
}

validate_config() {
	xtrace_save "reading .config"
	source "$CONFIG" || die "no .config file in $(dirname "$CONFIG")"
	xtrace_restore

	[[ -v CONFIG_LIVEPATCH ]] ||			\
		die "CONFIG_LIVEPATCH not enabled"

	[[ -v CONFIG_KLP_BUILD ]] ||			\
		die "CONFIG_KLP_BUILD not enabled"

	[[ -v CONFIG_GCC_PLUGIN_LATENT_ENTROPY ]] &&	\
		die "kernel option 'CONFIG_GCC_PLUGIN_LATENT_ENTROPY' not supported"

	[[ -v CONFIG_GCC_PLUGIN_RANDSTRUCT ]] &&	\
		die "kernel option 'CONFIG_GCC_PLUGIN_RANDSTRUCT' not supported"

	[[ -v CONFIG_AS_IS_LLVM ]] &&				\
		[[ "$CONFIG_AS_VERSION" -lt 200000 ]] &&	\
		die "Clang assembler version < 20 not supported"

	return 0
}

# Only allow alphanumerics and '_' and '-' in the module name.  Everything else
# is replaced with '-'.  Also truncate to 55 chars so the full name + NUL
# terminator fits in the kernel's 56-byte module name array.
module_name_string() {
	echo "${1//[^a-zA-Z0-9_-]/-}" | cut -c 1-55
}

# If the module name wasn't specified on the cmdline with --output, give it a
# name based on the patch name.
set_module_name() {
	[[ -v NAME ]] && return 0

	if [[ "${#PATCHES[@]}" -eq 1 ]]; then
		NAME="$(basename "${PATCHES[0]}")"
		NAME="${NAME%.*}"
	else
		NAME="patch"
	fi

	NAME="livepatch-$NAME"
	NAME="$(module_name_string "$NAME")"

	OUTFILE="$NAME.ko"
}

# Hardcode the value printed by the localversion script to prevent patch
# application from appending it with '+' due to a dirty git working tree.
set_kernelversion() {
	local file="$SRC/scripts/setlocalversion"
	local kernelrelease

	stash_file "$file"

	kernelrelease="$(cd "$SRC" && make syncconfig &>/dev/null && make -s kernelrelease)"
	[[ -z "$kernelrelease" ]] && die "failed to get kernel version"

	sed -i "2i echo $kernelrelease; exit 0" scripts/setlocalversion
}

get_patch_files() {
	local patch="$1"

	grep0 -E '^(--- |\+\+\+ )' "$patch"			\
		| gawk '{print $2}'				\
		| sed 's|^[^/]*/||'				\
		| sort -u
}

# Make sure git re-stats the changed files
git_refresh() {
	local patch="$1"
	local files=()

	[[ ! -e "$SRC/.git" ]] && return

	get_patch_files "$patch" | mapfile -t files

	(
		cd "$SRC"
		git update-index -q --refresh -- "${files[@]}"
	)
}

check_unsupported_patches() {
	local patch

	for patch in "${PATCHES[@]}"; do
		local files=()

		get_patch_files "$patch" | mapfile -t files

		for file in "${files[@]}"; do
			case "$file" in
				lib/*|*.S)
					die "unsupported patch to $file"
					;;
			esac
		done
	done
}

apply_patch() {
	local patch="$1"
	shift
	local extra_args=("$@")

	[[ ! -f "$patch" ]] && die "$patch doesn't exist"

	(
		cd "$SRC"

		# The sed strips the version signature from 'git format-patch',
		# otherwise 'git apply --recount' warns.
		sed -n '/^-- /q;p' "$patch" |
			git apply "${extra_args[@]}"
	)

	APPLIED_PATCHES+=("$patch")
}

revert_patch() {
	local patch="$1"
	shift
	local extra_args=("$@")
	local tmp=()

	(
		cd "$SRC"

		sed -n '/^-- /q;p' "$patch" |
			git apply --reverse "${extra_args[@]}"
	)
	git_refresh "$patch"

	for p in "${APPLIED_PATCHES[@]}"; do
		[[ "$p" == "$patch" ]] && continue
		tmp+=("$p")
	done

	APPLIED_PATCHES=("${tmp[@]}")
}

apply_patches() {
	local patch

	for patch in "${PATCHES[@]}"; do
		apply_patch "$patch"
	done
}

revert_patches() {
	local extra_args=("$@")
	local patches=("${APPLIED_PATCHES[@]}")

	for (( i=${#patches[@]}-1 ; i>=0 ; i-- )) ; do
		revert_patch "${patches[$i]}" "${extra_args[@]}"
	done

	APPLIED_PATCHES=()
}

validate_patches() {
	check_unsupported_patches
	apply_patches
	revert_patches
}

do_init() {
	# We're not yet smart enough to handle anything other than in-tree
	# builds in pwd.
	[[ ! "$SRC" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory"
	[[ ! "$OBJ" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory"

	(( SHORT_CIRCUIT <= 1 )) && rm -rf "$TMP_DIR"
	mkdir -p "$TMP_DIR"

	APPLIED_PATCHES=()

	[[ -x "$FIX_PATCH_LINES" ]] || die "can't find fix-patch-lines"

	validate_config
	set_module_name
	set_kernelversion
}

# Refresh the patch hunk headers, specifically the line numbers and counts.
refresh_patch() {
	local patch="$1"
	local tmpdir="$PATCH_TMP_DIR"
	local files=()

	rm -rf "$tmpdir"
	mkdir -p "$tmpdir/a"
	mkdir -p "$tmpdir/b"

	# Get all source files affected by the patch
	get_patch_files "$patch" | mapfile -t files

	# Copy orig source files to 'a'
	( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/a" )

	# Copy patched source files to 'b'
	apply_patch "$patch" --recount
	( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/b" )
	revert_patch "$patch" --recount

	# Diff 'a' and 'b' to make a clean patch
	( cd "$tmpdir" && git diff --no-index --no-prefix a b > "$patch" ) || true
}

# Copy the patches to a temporary directory, fix their lines so as not to
# affect the __LINE__ macro for otherwise unchanged functions further down the
# file, and update $PATCHES to point to the fixed patches.
fix_patches() {
	local idx
	local i

	rm -f "$TMP_DIR"/*.patch

	idx=0001
	for i in "${!PATCHES[@]}"; do
		local old_patch="${PATCHES[$i]}"
		local tmp_patch="$TMP_DIR/tmp.patch"
		local patch="${PATCHES[$i]}"
		local new_patch

		new_patch="$TMP_DIR/$idx-fixed-$(basename "$patch")"

		cp -f "$old_patch" "$tmp_patch"
		refresh_patch "$tmp_patch"
		"$FIX_PATCH_LINES" "$tmp_patch" > "$new_patch"
		refresh_patch "$new_patch"

		PATCHES[i]="$new_patch"

		rm -f "$tmp_patch"
		idx=$(printf "%04d" $(( 10#$idx + 1 )))
	done
}

clean_kernel() {
	local cmd=()

	cmd=("make")
	cmd+=("--silent")
	cmd+=("-j$JOBS")
	cmd+=("clean")

	(
		cd "$SRC"
		"${cmd[@]}"
	)
}

build_kernel() {
	local log="$TMP_DIR/build.log"
	local objtool_args=()
	local cmd=()

	objtool_args=("--checksum")

	cmd=("make")

	# When a patch to a kernel module references a newly created unexported
	# symbol which lives in vmlinux or another kernel module, the patched
	# kernel build fails with the following error:
	#
	#   ERROR: modpost: "klp_string" [fs/xfs/xfs.ko] undefined!
	#
	# The undefined symbols are working as designed in that case.  They get
	# resolved later when the livepatch module build link pulls all the
	# disparate objects together into the same kernel module.
	#
	# It would be good to have a way to tell modpost to skip checking for
	# undefined symbols altogether.  For now, just convert the error to a
	# warning with KBUILD_MODPOST_WARN, and grep out the warning to avoid
	# confusing the user.
	#
	cmd+=("KBUILD_MODPOST_WARN=1")

	cmd+=("$VERBOSE")
	cmd+=("-j$JOBS")
	cmd+=("KCFLAGS=-ffunction-sections -fdata-sections")
	cmd+=("OBJTOOL_ARGS=${objtool_args[*]}")
	cmd+=("vmlinux")
	cmd+=("modules")

	(
		cd "$SRC"
		"${cmd[@]}"							\
			1> >(tee -a "$log")					\
			2> >(tee -a "$log" | grep0 -v "modpost.*undefined!" >&2)
	)
}

find_objects() {
	local opts=("$@")

	# Find root-level vmlinux.o and non-root-level .ko files,
	# excluding klp-tmp/ and .git/
	find "$OBJ" \( -path "$TMP_DIR" -o -path "$OBJ/.git" -o	-regex "$OBJ/[^/][^/]*\.ko" \) -prune -o \
		    -type f "${opts[@]}"				\
		    \( -name "*.ko" -o -path "$OBJ/vmlinux.o" \)	\
		    -printf '%P\n'
}

# Copy all .o archives to $ORIG_DIR
copy_orig_objects() {
	local files=()

	rm -rf "$ORIG_DIR"
	mkdir -p "$ORIG_DIR"

	find_objects | mapfile -t files

	xtrace_save "copying orig objects"
	for _file in "${files[@]}"; do
		local rel_file="${_file/.ko/.o}"
		local file="$OBJ/$rel_file"
		local file_dir="$(dirname "$file")"
		local orig_file="$ORIG_DIR/$rel_file"
		local orig_dir="$(dirname "$orig_file")"

		[[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file"

		mkdir -p "$orig_dir"
		cp -f "$file" "$orig_dir"
	done
	xtrace_restore

	mv -f "$TMP_DIR/build.log" "$ORIG_DIR"
	touch "$TIMESTAMP"
}

# Copy all changed objects to $PATCHED_DIR
copy_patched_objects() {
	local files=()
	local opts=()
	local found=0

	rm -rf "$PATCHED_DIR"
	mkdir -p "$PATCHED_DIR"

	# Note this doesn't work with some configs, thus the 'cmp' below.
	opts=("-newer")
	opts+=("$TIMESTAMP")

	find_objects "${opts[@]}" | mapfile -t files

	xtrace_save "copying changed objects"
	for _file in "${files[@]}"; do
		local rel_file="${_file/.ko/.o}"
		local file="$OBJ/$rel_file"
		local orig_file="$ORIG_DIR/$rel_file"
		local patched_file="$PATCHED_DIR/$rel_file"
		local patched_dir="$(dirname "$patched_file")"

		[[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file"

		cmp -s "$orig_file" "$file" && continue

		mkdir -p "$patched_dir"
		cp -f "$file" "$patched_dir"
		found=1
	done
	xtrace_restore

	(( found == 0 )) && die "no changes detected"

	mv -f "$TMP_DIR/build.log" "$PATCHED_DIR"
}

# Diff changed objects, writing output object to $DIFF_DIR
diff_objects() {
	local log="$KLP_DIFF_LOG"
	local files=()
	local opts=()

	rm -rf "$DIFF_DIR"
	mkdir -p "$DIFF_DIR"

	find "$PATCHED_DIR" -type f -name "*.o" | mapfile -t files
	[[ ${#files[@]} -eq 0 ]] && die "no changes detected"

	[[ -v DEBUG_CLONE ]] && opts=("--debug")

	# Diff all changed objects
	for file in "${files[@]}"; do
		local rel_file="${file#"$PATCHED_DIR"/}"
		local orig_file="$rel_file"
		local patched_file="$PATCHED_DIR/$rel_file"
		local out_file="$DIFF_DIR/$rel_file"
		local filter=()
		local cmd=()

		mkdir -p "$(dirname "$out_file")"

		cmd=("$SRC/tools/objtool/objtool")
		cmd+=("klp")
		cmd+=("diff")
		(( ${#opts[@]} > 0 )) && cmd+=("${opts[@]}")
		cmd+=("$orig_file")
		cmd+=("$patched_file")
		cmd+=("$out_file")

		if [[ -v DIFF_CHECKSUM ]]; then
			filter=("grep0")
			filter+=("-Ev")
			filter+=("DEBUG: .*checksum: ")
		else
			filter=("cat")
		fi

		(
			cd "$ORIG_DIR"
			"${cmd[@]}"							\
				1> >(tee -a "$log")					\
				2> >(tee -a "$log" | "${filter[@]}" >&2) ||		\
				die "objtool klp diff failed"
		)
	done
}

# For each changed object, run objtool with --debug-checksum to get the
# per-instruction checksums, and then diff those to find the first changed
# instruction for each function.
diff_checksums() {
	local orig_log="$ORIG_DIR/checksum.log"
	local patched_log="$PATCHED_DIR/checksum.log"
	local -A funcs
	local cmd=()
	local line
	local file
	local func

	gawk '/\.o: changed function: / {
		sub(/:$/, "", $1)
		print $1, $NF
	}' "$KLP_DIFF_LOG" | mapfile -t lines

	for line in "${lines[@]}"; do
		read -r file func <<< "$line"
		if [[ ! -v funcs["$file"] ]]; then
			funcs["$file"]="$func"
		else
			funcs["$file"]+=" $func"
		fi
	done

	cmd=("$SRC/tools/objtool/objtool")
	cmd+=("--checksum")
	cmd+=("--link")
	cmd+=("--dry-run")

	for file in "${!funcs[@]}"; do
		local opt="--debug-checksum=${funcs[$file]// /,}"

		(
			cd "$ORIG_DIR"
			"${cmd[@]}" "$opt" "$file" &> "$orig_log" || \
				( cat "$orig_log" >&2; die "objtool --debug-checksum failed" )

			cd "$PATCHED_DIR"
			"${cmd[@]}" "$opt" "$file" &> "$patched_log" ||	\
				( cat "$patched_log" >&2; die "objtool --debug-checksum failed" )
		)

		for func in ${funcs[$file]}; do
			diff <( grep0 -E "^DEBUG: .*checksum: $func " "$orig_log"    | sed "s|$ORIG_DIR/||")	\
			     <( grep0 -E "^DEBUG: .*checksum: $func " "$patched_log" | sed "s|$PATCHED_DIR/||")	\
				| gawk '/^< DEBUG: / {
					gsub(/:/, "")
					printf "%s: %s: %s\n", $3, $5, $6
					exit
			}' || true
		done
	done
}

# Build and post-process livepatch module in $KMOD_DIR
build_patch_module() {
	local makefile="$KMOD_DIR/Kbuild"
	local log="$KMOD_DIR/build.log"
	local kmod_file
	local cflags=()
	local files=()
	local cmd=()

	rm -rf "$KMOD_DIR"
	mkdir -p "$KMOD_DIR"

	cp -f "$SRC/scripts/livepatch/init.c" "$KMOD_DIR"

	echo "obj-m := $NAME.o" > "$makefile"
	echo -n "$NAME-y := init.o" >> "$makefile"

	find "$DIFF_DIR" -type f -name "*.o" | mapfile -t files
	[[ ${#files[@]} -eq 0 ]] && die "no changes detected"

	for file in "${files[@]}"; do
		local rel_file="${file#"$DIFF_DIR"/}"
		local orig_file="$ORIG_DIR/$rel_file"
		local orig_dir="$(dirname "$orig_file")"
		local kmod_file="$KMOD_DIR/$rel_file"
		local kmod_dir="$(dirname "$kmod_file")"
		local cmd_file="$kmod_dir/.$(basename "$file").cmd"

		mkdir -p "$kmod_dir"
		cp -f "$file" "$kmod_dir"

		# Tell kbuild this is a prebuilt object
		cp -f "$file" "${kmod_file}_shipped"

		# Make modpost happy
		touch "$cmd_file"

		echo -n " $rel_file" >> "$makefile"
	done

	echo >> "$makefile"

	cflags=("-ffunction-sections")
	cflags+=("-fdata-sections")
	[[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE")

	cmd=("make")
	cmd+=("$VERBOSE")
	cmd+=("-j$JOBS")
	cmd+=("--directory=.")
	cmd+=("M=$KMOD_DIR")
	cmd+=("KCFLAGS=${cflags[*]}")

	# Build a "normal" kernel module with init.c and the diffed objects
	(
		cd "$SRC"
		"${cmd[@]}"							\
			1> >(tee -a "$log")					\
			2> >(tee -a "$log" >&2)
	)

	kmod_file="$KMOD_DIR/$NAME.ko"

	# Save off the intermediate binary for debugging
	cp -f "$kmod_file" "$kmod_file.orig"

	# Work around issue where slight .config change makes corrupt BTF
	objcopy --remove-section=.BTF "$kmod_file"

	# Fix (and work around) linker wreckage for klp syms / relocs
	"$SRC/tools/objtool/objtool" klp post-link "$kmod_file" || die "objtool klp post-link failed"

	cp -f "$kmod_file" "$OUTFILE"
}


################################################################################

process_args "$@"
do_init

if (( SHORT_CIRCUIT <= 1 )); then
	status "Validating patch(es)"
	validate_patches
	status "Building original kernel"
	clean_kernel
	build_kernel
	status "Copying original object files"
	copy_orig_objects
fi

if (( SHORT_CIRCUIT <= 2 )); then
	status "Fixing patch(es)"
	fix_patches
	apply_patches
	status "Building patched kernel"
	build_kernel
	revert_patches
	status "Copying patched object files"
	copy_patched_objects
fi

if (( SHORT_CIRCUIT <= 3 )); then
	status "Diffing objects"
	diff_objects
	if [[ -v DIFF_CHECKSUM ]]; then
		status "Finding first changed instructions"
		diff_checksums
	fi
fi

if (( SHORT_CIRCUIT <= 4 )); then
	status "Building patch module: $OUTFILE"
	build_patch_module
fi

status "SUCCESS"
