| #!/usr/bin/env python3 |
| # pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 |
| # Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. |
| # SPDX-License-Identifier: GPL-2.0 |
| |
| """ |
| Parse ABI documentation and produce results from it. |
| """ |
| |
| from argparse import Namespace |
| import logging |
| import os |
| import re |
| |
| from pprint import pformat |
| from random import randrange, seed |
| |
| # Import Python modules |
| |
| from helpers import AbiDebug, ABI_DIR |
| |
| |
| class AbiParser: |
| """Main class to parse ABI files""" |
| |
| TAGS = r"(what|where|date|kernelversion|contact|description|users)" |
| XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" |
| |
| def __init__(self, directory, logger=None, |
| enable_lineno=False, show_warnings=True, debug=0): |
| """Stores arguments for the class and initialize class vars""" |
| |
| self.directory = directory |
| self.enable_lineno = enable_lineno |
| self.show_warnings = show_warnings |
| self.debug = debug |
| |
| if not logger: |
| self.log = logging.getLogger("get_abi") |
| else: |
| self.log = logger |
| |
| self.data = {} |
| self.what_symbols = {} |
| self.file_refs = {} |
| self.what_refs = {} |
| |
| # Ignore files that contain such suffixes |
| self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") |
| |
| # Regular expressions used on parser |
| self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) |
| self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) |
| self.re_valid = re.compile(self.TAGS) |
| self.re_start_spc = re.compile(r"(\s*)(\S.*)") |
| self.re_whitespace = re.compile(r"^\s+") |
| |
| # Regular used on print |
| self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") |
| self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") |
| self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") |
| self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") |
| self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") |
| self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") |
| self.re_xref_node = re.compile(self.XREF) |
| |
| def warn(self, fdata, msg, extra=None): |
| """Displays a parse error if warning is enabled""" |
| |
| if not self.show_warnings: |
| return |
| |
| msg = f"{fdata.fname}:{fdata.ln}: {msg}" |
| if extra: |
| msg += "\n\t\t" + extra |
| |
| self.log.warning(msg) |
| |
| def add_symbol(self, what, fname, ln=None, xref=None): |
| """Create a reference table describing where each 'what' is located""" |
| |
| if what not in self.what_symbols: |
| self.what_symbols[what] = {"file": {}} |
| |
| if fname not in self.what_symbols[what]["file"]: |
| self.what_symbols[what]["file"][fname] = [] |
| |
| if ln and ln not in self.what_symbols[what]["file"][fname]: |
| self.what_symbols[what]["file"][fname].append(ln) |
| |
| if xref: |
| self.what_symbols[what]["xref"] = xref |
| |
| def _parse_line(self, fdata, line): |
| """Parse a single line of an ABI file""" |
| |
| new_what = False |
| new_tag = False |
| content = None |
| |
| match = self.re_tag.match(line) |
| if match: |
| new = match.group(1).lower() |
| sep = match.group(2) |
| content = match.group(3) |
| |
| match = self.re_valid.search(new) |
| if match: |
| new_tag = match.group(1) |
| else: |
| if fdata.tag == "description": |
| # New "tag" is actually part of description. |
| # Don't consider it a tag |
| new_tag = False |
| elif fdata.tag != "": |
| self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) |
| |
| if new_tag: |
| # "where" is Invalid, but was a common mistake. Warn if found |
| if new_tag == "where": |
| self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") |
| new_tag = "what" |
| |
| if new_tag == "what": |
| fdata.space = None |
| |
| if content not in self.what_symbols: |
| self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) |
| |
| if fdata.tag == "what": |
| fdata.what.append(content.strip("\n")) |
| else: |
| if fdata.key: |
| if "description" not in self.data.get(fdata.key, {}): |
| self.warn(fdata, f"{fdata.key} doesn't have a description") |
| |
| for w in fdata.what: |
| self.add_symbol(what=w, fname=fdata.fname, |
| ln=fdata.what_ln, xref=fdata.key) |
| |
| fdata.label = content |
| new_what = True |
| |
| key = "abi_" + content.lower() |
| fdata.key = self.re_unprintable.sub("_", key).strip("_") |
| |
| # Avoid duplicated keys but using a defined seed, to make |
| # the namespace identical if there aren't changes at the |
| # ABI symbols |
| seed(42) |
| |
| while fdata.key in self.data: |
| char = randrange(0, 51) + ord("A") |
| if char > ord("Z"): |
| char += ord("a") - ord("Z") - 1 |
| |
| fdata.key += chr(char) |
| |
| if fdata.key and fdata.key not in self.data: |
| self.data[fdata.key] = { |
| "what": [content], |
| "file": [fdata.file_ref], |
| "path": fdata.ftype, |
| "line_no": fdata.ln, |
| } |
| |
| fdata.what = self.data[fdata.key]["what"] |
| |
| self.what_refs[content] = fdata.key |
| fdata.tag = new_tag |
| fdata.what_ln = fdata.ln |
| |
| if fdata.nametag["what"]: |
| t = (content, fdata.key) |
| if t not in fdata.nametag["symbols"]: |
| fdata.nametag["symbols"].append(t) |
| |
| return |
| |
| if fdata.tag and new_tag: |
| fdata.tag = new_tag |
| |
| if new_what: |
| fdata.label = "" |
| |
| if "description" in self.data[fdata.key]: |
| self.data[fdata.key]["description"] += "\n\n" |
| |
| if fdata.file_ref not in self.data[fdata.key]["file"]: |
| self.data[fdata.key]["file"].append(fdata.file_ref) |
| |
| if self.debug == AbiDebug.WHAT_PARSING: |
| self.log.debug("what: %s", fdata.what) |
| |
| if not fdata.what: |
| self.warn(fdata, "'What:' should come first:", line) |
| return |
| |
| if new_tag == "description": |
| fdata.space = None |
| |
| if content: |
| sep = sep.replace(":", " ") |
| |
| c = " " * len(new_tag) + sep + content |
| c = c.expandtabs() |
| |
| match = self.re_start_spc.match(c) |
| if match: |
| # Preserve initial spaces for the first line |
| fdata.space = match.group(1) |
| content = match.group(2) + "\n" |
| |
| self.data[fdata.key][fdata.tag] = content |
| |
| return |
| |
| # Store any contents before tags at the database |
| if not fdata.tag and "what" in fdata.nametag: |
| fdata.nametag["description"] += line |
| return |
| |
| if fdata.tag == "description": |
| content = line.expandtabs() |
| |
| if self.re_whitespace.sub("", content) == "": |
| self.data[fdata.key][fdata.tag] += "\n" |
| return |
| |
| if fdata.space is None: |
| match = self.re_start_spc.match(content) |
| if match: |
| # Preserve initial spaces for the first line |
| fdata.space = match.group(1) |
| |
| content = match.group(2) + "\n" |
| else: |
| if content.startswith(fdata.space): |
| content = content[len(fdata.space):] |
| |
| else: |
| fdata.space = "" |
| |
| if fdata.tag == "what": |
| w = content.strip("\n") |
| if w: |
| self.data[fdata.key][fdata.tag].append(w) |
| else: |
| self.data[fdata.key][fdata.tag] += content |
| return |
| |
| content = line.strip() |
| if fdata.tag: |
| if fdata.tag == "what": |
| w = content.strip("\n") |
| if w: |
| self.data[fdata.key][fdata.tag].append(w) |
| else: |
| self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") |
| return |
| |
| # Everything else is error |
| if content: |
| self.warn(fdata, "Unexpected content", line) |
| |
| def parse_readme(self, nametag, fname): |
| """Parse ABI README file""" |
| |
| nametag["what"] = ["Introduction"] |
| nametag["path"] = "README" |
| with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: |
| for line in fp: |
| match = self.re_tag.match(line) |
| if match: |
| new = match.group(1).lower() |
| |
| match = self.re_valid.search(new) |
| if match: |
| nametag["description"] += "\n:" + line |
| continue |
| |
| nametag["description"] += line |
| |
| def parse_file(self, fname, path, basename): |
| """Parse a single file""" |
| |
| ref = f"abi_file_{path}_{basename}" |
| ref = self.re_unprintable.sub("_", ref).strip("_") |
| |
| # Store per-file state into a namespace variable. This will be used |
| # by the per-line parser state machine and by the warning function. |
| fdata = Namespace |
| |
| fdata.fname = fname |
| fdata.name = basename |
| |
| pos = fname.find(ABI_DIR) |
| if pos > 0: |
| f = fname[pos:] |
| else: |
| f = fname |
| |
| fdata.file_ref = (f, ref) |
| self.file_refs[f] = ref |
| |
| fdata.ln = 0 |
| fdata.what_ln = 0 |
| fdata.tag = "" |
| fdata.label = "" |
| fdata.what = [] |
| fdata.key = None |
| fdata.xrefs = None |
| fdata.space = None |
| fdata.ftype = path.split("/")[0] |
| |
| fdata.nametag = {} |
| fdata.nametag["what"] = [f"ABI file {path}/{basename}"] |
| fdata.nametag["type"] = "File" |
| fdata.nametag["path"] = fdata.ftype |
| fdata.nametag["file"] = [fdata.file_ref] |
| fdata.nametag["line_no"] = 1 |
| fdata.nametag["description"] = "" |
| fdata.nametag["symbols"] = [] |
| |
| self.data[ref] = fdata.nametag |
| |
| if self.debug & AbiDebug.WHAT_OPEN: |
| self.log.debug("Opening file %s", fname) |
| |
| if basename == "README": |
| self.parse_readme(fdata.nametag, fname) |
| return |
| |
| with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: |
| for line in fp: |
| fdata.ln += 1 |
| |
| self._parse_line(fdata, line) |
| |
| if "description" in fdata.nametag: |
| fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") |
| |
| if fdata.key: |
| if "description" not in self.data.get(fdata.key, {}): |
| self.warn(fdata, f"{fdata.key} doesn't have a description") |
| |
| for w in fdata.what: |
| self.add_symbol(what=w, fname=fname, xref=fdata.key) |
| |
| def _parse_abi(self, root=None): |
| """Internal function to parse documentation ABI recursively""" |
| |
| if not root: |
| root = self.directory |
| |
| with os.scandir(root) as obj: |
| for entry in obj: |
| name = os.path.join(root, entry.name) |
| |
| if entry.is_dir(): |
| self._parse_abi(name) |
| continue |
| |
| if not entry.is_file(): |
| continue |
| |
| basename = os.path.basename(name) |
| |
| if basename.startswith("."): |
| continue |
| |
| if basename.endswith(self.ignore_suffixes): |
| continue |
| |
| path = self.re_abi_dir.sub("", os.path.dirname(name)) |
| |
| self.parse_file(name, path, basename) |
| |
| def parse_abi(self, root=None): |
| """Parse documentation ABI""" |
| |
| self._parse_abi(root) |
| |
| if self.debug & AbiDebug.DUMP_ABI_STRUCTS: |
| self.log.debug(pformat(self.data)) |
| |
| def desc_txt(self, desc): |
| """Print description as found inside ABI files""" |
| |
| desc = desc.strip(" \t\n") |
| |
| return desc + "\n\n" |
| |
| def xref(self, fname): |
| """ |
| Converts a Documentation/ABI + basename into a ReST cross-reference |
| """ |
| |
| xref = self.file_refs.get(fname) |
| if not xref: |
| return None |
| else: |
| return xref |
| |
| def desc_rst(self, desc): |
| """Enrich ReST output by creating cross-references""" |
| |
| # Remove title markups from the description |
| # Having titles inside ABI files will only work if extra |
| # care would be taken in order to strictly follow the same |
| # level order for each markup. |
| desc = self.re_title_mark.sub("\n\n", "\n" + desc) |
| desc = desc.rstrip(" \t\n").lstrip("\n") |
| |
| # Python's regex performance for non-compiled expressions is a lot |
| # than Perl, as Perl automatically caches them at their |
| # first usage. Here, we'll need to do the same, as otherwise the |
| # performance penalty is be high |
| |
| new_desc = "" |
| for d in desc.split("\n"): |
| if d == "": |
| new_desc += "\n" |
| continue |
| |
| # Use cross-references for doc files where needed |
| d = self.re_doc.sub(r":doc:`/\1`", d) |
| |
| # Use cross-references for ABI generated docs where needed |
| matches = self.re_abi.findall(d) |
| for m in matches: |
| abi = m[0] + m[1] |
| |
| xref = self.file_refs.get(abi) |
| if not xref: |
| # This may happen if ABI is on a separate directory, |
| # like parsing ABI testing and symbol is at stable. |
| # The proper solution is to move this part of the code |
| # for it to be inside sphinx/kernel_abi.py |
| self.log.info("Didn't find ABI reference for '%s'", abi) |
| else: |
| new = self.re_escape.sub(r"\\\1", m[1]) |
| d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) |
| |
| # Seek for cross reference symbols like /sys/... |
| # Need to be careful to avoid doing it on a code block |
| if d[0] not in [" ", "\t"]: |
| matches = self.re_xref_node.findall(d) |
| for m in matches: |
| # Finding ABI here is more complex due to wildcards |
| xref = self.what_refs.get(m) |
| if xref: |
| new = self.re_escape.sub(r"\\\1", m) |
| d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) |
| |
| new_desc += d + "\n" |
| |
| return new_desc + "\n\n" |
| |
| def doc(self, output_in_txt=False, show_symbols=True, show_file=True, |
| filter_path=None): |
| """Print ABI at stdout""" |
| |
| part = None |
| for key, v in sorted(self.data.items(), |
| key=lambda x: (x[1].get("type", ""), |
| x[1].get("what"))): |
| |
| wtype = v.get("type", "Symbol") |
| file_ref = v.get("file") |
| names = v.get("what", [""]) |
| |
| if wtype == "File": |
| if not show_file: |
| continue |
| else: |
| if not show_symbols: |
| continue |
| |
| if filter_path: |
| if v.get("path") != filter_path: |
| continue |
| |
| msg = "" |
| |
| if wtype != "File": |
| cur_part = names[0] |
| if cur_part.find("/") >= 0: |
| match = self.re_what.match(cur_part) |
| if match: |
| symbol = match.group(1).rstrip("/") |
| cur_part = "Symbols under " + symbol |
| |
| if cur_part and cur_part != part: |
| part = cur_part |
| msg += part + "\n"+ "-" * len(part) +"\n\n" |
| |
| msg += f".. _{key}:\n\n" |
| |
| max_len = 0 |
| for i in range(0, len(names)): # pylint: disable=C0200 |
| names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" |
| |
| max_len = max(max_len, len(names[i])) |
| |
| msg += "+-" + "-" * max_len + "-+\n" |
| for name in names: |
| msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" |
| msg += "+-" + "-" * max_len + "-+\n" |
| msg += "\n" |
| |
| for ref in file_ref: |
| if wtype == "File": |
| msg += f".. _{ref[1]}:\n\n" |
| else: |
| base = os.path.basename(ref[0]) |
| msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" |
| |
| if wtype == "File": |
| msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" |
| |
| desc = v.get("description") |
| if not desc and wtype != "File": |
| msg += f"DESCRIPTION MISSING for {names[0]}\n\n" |
| |
| if desc: |
| if output_in_txt: |
| msg += self.desc_txt(desc) |
| else: |
| msg += self.desc_rst(desc) |
| |
| symbols = v.get("symbols") |
| if symbols: |
| msg += "Has the following ABI:\n\n" |
| |
| for w, label in symbols: |
| # Escape special chars from content |
| content = self.re_escape.sub(r"\\\1", w) |
| |
| msg += f"- :ref:`{content} <{label}>`\n\n" |
| |
| users = v.get("users") |
| if users and users.strip(" \t\n"): |
| users = users.strip("\n").replace('\n', '\n\t') |
| msg += f"Users:\n\t{users}\n\n" |
| |
| ln = v.get("line_no", 1) |
| |
| yield (msg, file_ref[0][0], ln) |
| |
| def check_issues(self): |
| """Warn about duplicated ABI entries""" |
| |
| for what, v in self.what_symbols.items(): |
| files = v.get("file") |
| if not files: |
| # Should never happen if the parser works properly |
| self.log.warning("%s doesn't have a file associated", what) |
| continue |
| |
| if len(files) == 1: |
| continue |
| |
| f = [] |
| for fname, lines in sorted(files.items()): |
| if not lines: |
| f.append(f"{fname}") |
| elif len(lines) == 1: |
| f.append(f"{fname}:{lines[0]}") |
| else: |
| m = fname + "lines " |
| m += ", ".join(str(x) for x in lines) |
| f.append(m) |
| |
| self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) |
| |
| def search_symbols(self, expr): |
| """ Searches for ABI symbols """ |
| |
| regex = re.compile(expr, re.I) |
| |
| found_keys = 0 |
| for t in sorted(self.data.items(), key=lambda x: [0]): |
| v = t[1] |
| |
| wtype = v.get("type", "") |
| if wtype == "File": |
| continue |
| |
| for what in v.get("what", [""]): |
| if regex.search(what): |
| found_keys += 1 |
| |
| kernelversion = v.get("kernelversion", "").strip(" \t\n") |
| date = v.get("date", "").strip(" \t\n") |
| contact = v.get("contact", "").strip(" \t\n") |
| users = v.get("users", "").strip(" \t\n") |
| desc = v.get("description", "").strip(" \t\n") |
| |
| files = [] |
| for f in v.get("file", ()): |
| files.append(f[0]) |
| |
| what = str(found_keys) + ". " + what |
| title_tag = "-" * len(what) |
| |
| print(f"\n{what}\n{title_tag}\n") |
| |
| if kernelversion: |
| print(f"Kernel version:\t\t{kernelversion}") |
| |
| if date: |
| print(f"Date:\t\t\t{date}") |
| |
| if contact: |
| print(f"Contact:\t\t{contact}") |
| |
| if users: |
| print(f"Users:\t\t\t{users}") |
| |
| print("Defined on file(s):\t" + ", ".join(files)) |
| |
| if desc: |
| desc = desc.strip("\n") |
| print(f"\n{desc}\n") |
| |
| if not found_keys: |
| print(f"Regular expression /{expr}/ not found.") |