scripts/lib/abi/abi_parser.py - linux - Git at Google

 #!/usr/bin/env python3
 # pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
 # Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
 # SPDX-License-Identifier: GPL-2.0

 """
 Parse ABI documentation and produce results from it.
 """

 from argparse import Namespace
 import logging
 import os
 import re

 from pprint import pformat
 from random import randrange, seed

 # Import Python modules

 from helpers import AbiDebug, ABI_DIR


 class AbiParser:
     """Main class to parse ABI files"""

     TAGS = r"(what|where|date|kernelversion|contact|description|users)"
     XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"

     def __init__(self, directory, logger=None,
                  enable_lineno=False, show_warnings=True, debug=0):
         """Stores arguments for the class and initialize class vars"""

         self.directory = directory
         self.enable_lineno = enable_lineno
         self.show_warnings = show_warnings
         self.debug = debug

         if not logger:
             self.log = logging.getLogger("get_abi")
         else:
             self.log = logger

         self.data = {}
         self.what_symbols = {}
         self.file_refs = {}
         self.what_refs = {}

         # Ignore files that contain such suffixes
         self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")

         # Regular expressions used on parser
         self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
         self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
         self.re_valid = re.compile(self.TAGS)
         self.re_start_spc = re.compile(r"(\s*)(\S.*)")
         self.re_whitespace = re.compile(r"^\s+")

         # Regular used on print
         self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
         self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
         self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
         self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
         self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
         self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
         self.re_xref_node = re.compile(self.XREF)

     def warn(self, fdata, msg, extra=None):
         """Displays a parse error if warning is enabled"""

         if not self.show_warnings:
             return

         msg = f"{fdata.fname}:{fdata.ln}: {msg}"
         if extra:
             msg += "\n\t\t" + extra

         self.log.warning(msg)

     def add_symbol(self, what, fname, ln=None, xref=None):
         """Create a reference table describing where each 'what' is located"""

         if what not in self.what_symbols:
             self.what_symbols[what] = {"file": {}}

         if fname not in self.what_symbols[what]["file"]:
             self.what_symbols[what]["file"][fname] = []

         if ln and ln not in self.what_symbols[what]["file"][fname]:
             self.what_symbols[what]["file"][fname].append(ln)

         if xref:
             self.what_symbols[what]["xref"] = xref

     def _parse_line(self, fdata, line):
         """Parse a single line of an ABI file"""

         new_what = False
         new_tag = False
         content = None

         match = self.re_tag.match(line)
         if match:
             new = match.group(1).lower()
             sep = match.group(2)
             content = match.group(3)

             match = self.re_valid.search(new)
             if match:
                 new_tag = match.group(1)
             else:
                 if fdata.tag == "description":
                     # New "tag" is actually part of description.
                     # Don't consider it a tag
                     new_tag = False
                 elif fdata.tag != "":
                     self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)

         if new_tag:
             # "where" is Invalid, but was a common mistake. Warn if found
             if new_tag == "where":
                 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
                 new_tag = "what"

             if new_tag == "what":
                 fdata.space = None

                 if content not in self.what_symbols:
                     self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)

                 if fdata.tag == "what":
                     fdata.what.append(content.strip("\n"))
                 else:
                     if fdata.key:
                         if "description" not in self.data.get(fdata.key, {}):
                             self.warn(fdata, f"{fdata.key} doesn't have a description")

                         for w in fdata.what:
                             self.add_symbol(what=w, fname=fdata.fname,
                                             ln=fdata.what_ln, xref=fdata.key)

                     fdata.label = content
                     new_what = True

                     key = "abi_" + content.lower()
                     fdata.key = self.re_unprintable.sub("_", key).strip("_")

                     # Avoid duplicated keys but using a defined seed, to make
                     # the namespace identical if there aren't changes at the
                     # ABI symbols
                     seed(42)

                     while fdata.key in self.data:
                         char = randrange(0, 51) + ord("A")
                         if char > ord("Z"):
                             char += ord("a") - ord("Z") - 1

                         fdata.key += chr(char)

                     if fdata.key and fdata.key not in self.data:
                         self.data[fdata.key] = {
                             "what": [content],
                             "file": [fdata.file_ref],
                             "path": fdata.ftype,
                             "line_no": fdata.ln,
                         }

                     fdata.what = self.data[fdata.key]["what"]

                 self.what_refs[content] = fdata.key
                 fdata.tag = new_tag
                 fdata.what_ln = fdata.ln

                 if fdata.nametag["what"]:
                     t = (content, fdata.key)
                     if t not in fdata.nametag["symbols"]:
                         fdata.nametag["symbols"].append(t)

                 return

             if fdata.tag and new_tag:
                 fdata.tag = new_tag

                 if new_what:
                     fdata.label = ""

                     if "description" in self.data[fdata.key]:
                         self.data[fdata.key]["description"] += "\n\n"

                     if fdata.file_ref not in self.data[fdata.key]["file"]:
                         self.data[fdata.key]["file"].append(fdata.file_ref)

                     if self.debug == AbiDebug.WHAT_PARSING:
                         self.log.debug("what: %s", fdata.what)

                 if not fdata.what:
                     self.warn(fdata, "'What:' should come first:", line)
                     return

                 if new_tag == "description":
                     fdata.space = None

                     if content:
                         sep = sep.replace(":", " ")

                         c = " " * len(new_tag) + sep + content
                         c = c.expandtabs()

                         match = self.re_start_spc.match(c)
                         if match:
                             # Preserve initial spaces for the first line
                             fdata.space = match.group(1)
                             content = match.group(2) + "\n"

                 self.data[fdata.key][fdata.tag] = content

             return

         # Store any contents before tags at the database
         if not fdata.tag and "what" in fdata.nametag:
             fdata.nametag["description"] += line
             return

         if fdata.tag == "description":
             content = line.expandtabs()

             if self.re_whitespace.sub("", content) == "":
                 self.data[fdata.key][fdata.tag] += "\n"
                 return

             if fdata.space is None:
                 match = self.re_start_spc.match(content)
                 if match:
                     # Preserve initial spaces for the first line
                     fdata.space = match.group(1)

                     content = match.group(2) + "\n"
             else:
                 if content.startswith(fdata.space):
                     content = content[len(fdata.space):]

                 else:
                     fdata.space = ""

             if fdata.tag == "what":
                 w = content.strip("\n")
                 if w:
                     self.data[fdata.key][fdata.tag].append(w)
             else:
                 self.data[fdata.key][fdata.tag] += content
             return

         content = line.strip()
         if fdata.tag:
             if fdata.tag == "what":
                 w = content.strip("\n")
                 if w:
                     self.data[fdata.key][fdata.tag].append(w)
             else:
                 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
             return

         # Everything else is error
         if content:
             self.warn(fdata, "Unexpected content", line)

     def parse_readme(self, nametag, fname):
         """Parse ABI README file"""

         nametag["what"] = ["Introduction"]
         nametag["path"] = "README"
         with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
             for line in fp:
                 match = self.re_tag.match(line)
                 if match:
                     new = match.group(1).lower()

                     match = self.re_valid.search(new)
                     if match:
                         nametag["description"] += "\n:" + line
                         continue

                 nametag["description"] += line

     def parse_file(self, fname, path, basename):
         """Parse a single file"""

         ref = f"abi_file_{path}_{basename}"
         ref = self.re_unprintable.sub("_", ref).strip("_")

         # Store per-file state into a namespace variable. This will be used
         # by the per-line parser state machine and by the warning function.
         fdata = Namespace

         fdata.fname = fname
         fdata.name = basename

         pos = fname.find(ABI_DIR)
         if pos > 0:
             f = fname[pos:]
         else:
             f = fname

         fdata.file_ref = (f, ref)
         self.file_refs[f] = ref

         fdata.ln = 0
         fdata.what_ln = 0
         fdata.tag = ""
         fdata.label = ""
         fdata.what = []
         fdata.key = None
         fdata.xrefs = None
         fdata.space = None
         fdata.ftype = path.split("/")[0]

         fdata.nametag = {}
         fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
         fdata.nametag["type"] = "File"
         fdata.nametag["path"] = fdata.ftype
         fdata.nametag["file"] = [fdata.file_ref]
         fdata.nametag["line_no"] = 1
         fdata.nametag["description"] = ""
         fdata.nametag["symbols"] = []

         self.data[ref] = fdata.nametag

         if self.debug & AbiDebug.WHAT_OPEN:
             self.log.debug("Opening file %s", fname)

         if basename == "README":
             self.parse_readme(fdata.nametag, fname)
             return

         with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
             for line in fp:
                 fdata.ln += 1

                 self._parse_line(fdata, line)

             if "description" in fdata.nametag:
                 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")

             if fdata.key:
                 if "description" not in self.data.get(fdata.key, {}):
                     self.warn(fdata, f"{fdata.key} doesn't have a description")

                 for w in fdata.what:
                     self.add_symbol(what=w, fname=fname, xref=fdata.key)

     def _parse_abi(self, root=None):
         """Internal function to parse documentation ABI recursively"""

         if not root:
             root = self.directory

         with os.scandir(root) as obj:
             for entry in obj:
                 name = os.path.join(root, entry.name)

                 if entry.is_dir():
                     self._parse_abi(name)
                     continue

                 if not entry.is_file():
                     continue

                 basename = os.path.basename(name)

                 if basename.startswith("."):
                     continue

                 if basename.endswith(self.ignore_suffixes):
                     continue

                 path = self.re_abi_dir.sub("", os.path.dirname(name))

                 self.parse_file(name, path, basename)

     def parse_abi(self, root=None):
         """Parse documentation ABI"""

         self._parse_abi(root)

         if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
             self.log.debug(pformat(self.data))

     def desc_txt(self, desc):
         """Print description as found inside ABI files"""

         desc = desc.strip(" \t\n")

         return desc + "\n\n"

     def xref(self, fname):
         """
         Converts a Documentation/ABI + basename into a ReST cross-reference
         """

         xref = self.file_refs.get(fname)
         if not xref:
             return None
         else:
             return xref

     def desc_rst(self, desc):
         """Enrich ReST output by creating cross-references"""

         # Remove title markups from the description
         # Having titles inside ABI files will only work if extra
         # care would be taken in order to strictly follow the same
         # level order for each markup.
         desc = self.re_title_mark.sub("\n\n", "\n" + desc)
         desc = desc.rstrip(" \t\n").lstrip("\n")

         # Python's regex performance for non-compiled expressions is a lot
         # than Perl, as Perl automatically caches them at their
         # first usage. Here, we'll need to do the same, as otherwise the
         # performance penalty is be high

         new_desc = ""
         for d in desc.split("\n"):
             if d == "":
                 new_desc += "\n"
                 continue

             # Use cross-references for doc files where needed
             d = self.re_doc.sub(r":doc:`/\1`", d)

             # Use cross-references for ABI generated docs where needed
             matches = self.re_abi.findall(d)
             for m in matches:
                 abi = m[0] + m[1]

                 xref = self.file_refs.get(abi)
                 if not xref:
                     # This may happen if ABI is on a separate directory,
                     # like parsing ABI testing and symbol is at stable.
                     # The proper solution is to move this part of the code
                     # for it to be inside sphinx/kernel_abi.py
                     self.log.info("Didn't find ABI reference for '%s'", abi)
                 else:
                     new = self.re_escape.sub(r"\\\1", m[1])
                     d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)

             # Seek for cross reference symbols like /sys/...
             # Need to be careful to avoid doing it on a code block
             if d[0] not in [" ", "\t"]:
                 matches = self.re_xref_node.findall(d)
                 for m in matches:
                     # Finding ABI here is more complex due to wildcards
                     xref = self.what_refs.get(m)
                     if xref:
                         new = self.re_escape.sub(r"\\\1", m)
                         d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)

             new_desc += d + "\n"

         return new_desc + "\n\n"

     def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
             filter_path=None):
         """Print ABI at stdout"""

         part = None
         for key, v in sorted(self.data.items(),
                              key=lambda x: (x[1].get("type", ""),
                                             x[1].get("what"))):

             wtype = v.get("type", "Symbol")
             file_ref = v.get("file")
             names = v.get("what", [""])

             if wtype == "File":
                 if not show_file:
                     continue
             else:
                 if not show_symbols:
                     continue

             if filter_path:
                 if v.get("path") != filter_path:
                     continue

             msg = ""

             if wtype != "File":
                 cur_part = names[0]
                 if cur_part.find("/") >= 0:
                     match = self.re_what.match(cur_part)
                     if match:
                         symbol = match.group(1).rstrip("/")
                         cur_part = "Symbols under " + symbol

                 if cur_part and cur_part != part:
                     part = cur_part
                     msg += part + "\n"+ "-" * len(part) +"\n\n"

                 msg += f".. _{key}:\n\n"

                 max_len = 0
                 for i in range(0, len(names)):           # pylint: disable=C0200
                     names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"

                     max_len = max(max_len, len(names[i]))

                 msg += "+-" + "-" * max_len + "-+\n"
                 for name in names:
                     msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
                     msg += "+-" + "-" * max_len + "-+\n"
                 msg += "\n"

             for ref in file_ref:
                 if wtype == "File":
                     msg += f".. _{ref[1]}:\n\n"
                 else:
                     base = os.path.basename(ref[0])
                     msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"

             if wtype == "File":
                 msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"

             desc = v.get("description")
             if not desc and wtype != "File":
                 msg += f"DESCRIPTION MISSING for {names[0]}\n\n"

             if desc:
                 if output_in_txt:
                     msg += self.desc_txt(desc)
                 else:
                     msg += self.desc_rst(desc)

             symbols = v.get("symbols")
             if symbols:
                 msg += "Has the following ABI:\n\n"

                 for w, label in symbols:
                     # Escape special chars from content
                     content = self.re_escape.sub(r"\\\1", w)

                     msg += f"- :ref:`{content} <{label}>`\n\n"

             users = v.get("users")
             if users and users.strip(" \t\n"):
                 users = users.strip("\n").replace('\n', '\n\t')
                 msg += f"Users:\n\t{users}\n\n"

             ln = v.get("line_no", 1)

             yield (msg, file_ref[0][0], ln)

     def check_issues(self):
         """Warn about duplicated ABI entries"""

         for what, v in self.what_symbols.items():
             files = v.get("file")
             if not files:
                 # Should never happen if the parser works properly
                 self.log.warning("%s doesn't have a file associated", what)
                 continue

             if len(files) == 1:
                 continue

             f = []
             for fname, lines in sorted(files.items()):
                 if not lines:
                     f.append(f"{fname}")
                 elif len(lines) == 1:
                     f.append(f"{fname}:{lines[0]}")
                 else:
                     m = fname + "lines "
                     m += ", ".join(str(x) for x in lines)
                     f.append(m)

             self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))

     def search_symbols(self, expr):
         """ Searches for ABI symbols """

         regex = re.compile(expr, re.I)

         found_keys = 0
         for t in sorted(self.data.items(), key=lambda x: [0]):
             v = t[1]

             wtype = v.get("type", "")
             if wtype == "File":
                 continue

             for what in v.get("what", [""]):
                 if regex.search(what):
                     found_keys += 1

                     kernelversion = v.get("kernelversion", "").strip(" \t\n")
                     date = v.get("date", "").strip(" \t\n")
                     contact = v.get("contact", "").strip(" \t\n")
                     users = v.get("users", "").strip(" \t\n")
                     desc = v.get("description", "").strip(" \t\n")

                     files = []
                     for f in v.get("file", ()):
                         files.append(f[0])

                     what = str(found_keys) + ". " + what
                     title_tag = "-" * len(what)

                     print(f"\n{what}\n{title_tag}\n")

                     if kernelversion:
                         print(f"Kernel version:\t\t{kernelversion}")

                     if date:
                         print(f"Date:\t\t\t{date}")

                     if contact:
                         print(f"Contact:\t\t{contact}")

                     if users:
                         print(f"Users:\t\t\t{users}")

                     print("Defined on file(s):\t" + ", ".join(files))

                     if desc:
                         desc = desc.strip("\n")
                         print(f"\n{desc}\n")

         if not found_keys:
             print(f"Regular expression /{expr}/ not found.")
	#!/usr/bin/env python3
	# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
	# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
	# SPDX-License-Identifier: GPL-2.0

	"""
	Parse ABI documentation and produce results from it.
	"""

	from argparse import Namespace
	import logging
	import os
	import re

	from pprint import pformat
	from random import randrange, seed

	# Import Python modules

	from helpers import AbiDebug, ABI_DIR


	class AbiParser:
	"""Main class to parse ABI files"""

	TAGS = r"(what\|where\|date\|kernelversion\|contact\|description\|users)"
	XREF = r"(?:^\|\s\|\()(\/(?:sys\|config\|proc\|dev\|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]\|\Z)"

	def __init__(self, directory, logger=None,
	enable_lineno=False, show_warnings=True, debug=0):
	"""Stores arguments for the class and initialize class vars"""

	self.directory = directory
	self.enable_lineno = enable_lineno
	self.show_warnings = show_warnings
	self.debug = debug

	if not logger:
	self.log = logging.getLogger("get_abi")
	else:
	self.log = logger

	self.data = {}
	self.what_symbols = {}
	self.file_refs = {}
	self.what_refs = {}

	# Ignore files that contain such suffixes
	self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")

	# Regular expressions used on parser
	self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
	self.re_tag = re.compile(r"(\S+)(:\s)(.)", re.I)
	self.re_valid = re.compile(self.TAGS)
	self.re_start_spc = re.compile(r"(\s)(\S.)")
	self.re_whitespace = re.compile(r"^\s+")

	# Regular used on print
	self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
	self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
	self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
	self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
	self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
	self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
	self.re_xref_node = re.compile(self.XREF)

	def warn(self, fdata, msg, extra=None):
	"""Displays a parse error if warning is enabled"""

	if not self.show_warnings:
	return

	msg = f"{fdata.fname}:{fdata.ln}: {msg}"
	if extra:
	msg += "\n\t\t" + extra

	self.log.warning(msg)

	def add_symbol(self, what, fname, ln=None, xref=None):
	"""Create a reference table describing where each 'what' is located"""

	if what not in self.what_symbols:
	self.what_symbols[what] = {"file": {}}

	if fname not in self.what_symbols[what]["file"]:
	self.what_symbols[what]["file"][fname] = []

	if ln and ln not in self.what_symbols[what]["file"][fname]:
	self.what_symbols[what]["file"][fname].append(ln)

	if xref:
	self.what_symbols[what]["xref"] = xref

	def _parse_line(self, fdata, line):
	"""Parse a single line of an ABI file"""

	new_what = False
	new_tag = False
	content = None

	match = self.re_tag.match(line)
	if match:
	new = match.group(1).lower()
	sep = match.group(2)
	content = match.group(3)

	match = self.re_valid.search(new)
	if match:
	new_tag = match.group(1)
	else:
	if fdata.tag == "description":
	# New "tag" is actually part of description.
	# Don't consider it a tag
	new_tag = False
	elif fdata.tag != "":
	self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)

	if new_tag:
	# "where" is Invalid, but was a common mistake. Warn if found
	if new_tag == "where":
	self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
	new_tag = "what"

	if new_tag == "what":
	fdata.space = None

	if content not in self.what_symbols:
	self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)

	if fdata.tag == "what":
	fdata.what.append(content.strip("\n"))
	else:
	if fdata.key:
	if "description" not in self.data.get(fdata.key, {}):
	self.warn(fdata, f"{fdata.key} doesn't have a description")

	for w in fdata.what:
	self.add_symbol(what=w, fname=fdata.fname,
	ln=fdata.what_ln, xref=fdata.key)

	fdata.label = content
	new_what = True

	key = "abi_" + content.lower()
	fdata.key = self.re_unprintable.sub("_", key).strip("_")

	# Avoid duplicated keys but using a defined seed, to make
	# the namespace identical if there aren't changes at the
	# ABI symbols
	seed(42)

	while fdata.key in self.data:
	char = randrange(0, 51) + ord("A")
	if char > ord("Z"):
	char += ord("a") - ord("Z") - 1

	fdata.key += chr(char)

	if fdata.key and fdata.key not in self.data:
	self.data[fdata.key] = {
	"what": [content],
	"file": [fdata.file_ref],
	"path": fdata.ftype,
	"line_no": fdata.ln,
	}

	fdata.what = self.data[fdata.key]["what"]

	self.what_refs[content] = fdata.key
	fdata.tag = new_tag
	fdata.what_ln = fdata.ln

	if fdata.nametag["what"]:
	t = (content, fdata.key)
	if t not in fdata.nametag["symbols"]:
	fdata.nametag["symbols"].append(t)

	return

	if fdata.tag and new_tag:
	fdata.tag = new_tag

	if new_what:
	fdata.label = ""

	if "description" in self.data[fdata.key]:
	self.data[fdata.key]["description"] += "\n\n"

	if fdata.file_ref not in self.data[fdata.key]["file"]:
	self.data[fdata.key]["file"].append(fdata.file_ref)

	if self.debug == AbiDebug.WHAT_PARSING:
	self.log.debug("what: %s", fdata.what)

	if not fdata.what:
	self.warn(fdata, "'What:' should come first:", line)
	return

	if new_tag == "description":
	fdata.space = None

	if content:
	sep = sep.replace(":", " ")

	c = " " * len(new_tag) + sep + content
	c = c.expandtabs()

	match = self.re_start_spc.match(c)
	if match:
	# Preserve initial spaces for the first line
	fdata.space = match.group(1)
	content = match.group(2) + "\n"

	self.data[fdata.key][fdata.tag] = content

	return

	# Store any contents before tags at the database
	if not fdata.tag and "what" in fdata.nametag:
	fdata.nametag["description"] += line
	return

	if fdata.tag == "description":
	content = line.expandtabs()

	if self.re_whitespace.sub("", content) == "":
	self.data[fdata.key][fdata.tag] += "\n"
	return

	if fdata.space is None:
	match = self.re_start_spc.match(content)
	if match:
	# Preserve initial spaces for the first line
	fdata.space = match.group(1)

	content = match.group(2) + "\n"
	else:
	if content.startswith(fdata.space):
	content = content[len(fdata.space):]

	else:
	fdata.space = ""

	if fdata.tag == "what":
	w = content.strip("\n")
	if w:
	self.data[fdata.key][fdata.tag].append(w)
	else:
	self.data[fdata.key][fdata.tag] += content
	return

	content = line.strip()
	if fdata.tag:
	if fdata.tag == "what":
	w = content.strip("\n")
	if w:
	self.data[fdata.key][fdata.tag].append(w)
	else:
	self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
	return

	# Everything else is error
	if content:
	self.warn(fdata, "Unexpected content", line)

	def parse_readme(self, nametag, fname):
	"""Parse ABI README file"""

	nametag["what"] = ["Introduction"]
	nametag["path"] = "README"
	with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
	for line in fp:
	match = self.re_tag.match(line)
	if match:
	new = match.group(1).lower()

	match = self.re_valid.search(new)
	if match:
	nametag["description"] += "\n:" + line
	continue

	nametag["description"] += line

	def parse_file(self, fname, path, basename):
	"""Parse a single file"""

	ref = f"abi_file_{path}_{basename}"
	ref = self.re_unprintable.sub("_", ref).strip("_")

	# Store per-file state into a namespace variable. This will be used
	# by the per-line parser state machine and by the warning function.
	fdata = Namespace

	fdata.fname = fname
	fdata.name = basename

	pos = fname.find(ABI_DIR)
	if pos > 0:
	f = fname[pos:]
	else:
	f = fname

	fdata.file_ref = (f, ref)
	self.file_refs[f] = ref

	fdata.ln = 0
	fdata.what_ln = 0
	fdata.tag = ""
	fdata.label = ""
	fdata.what = []
	fdata.key = None
	fdata.xrefs = None
	fdata.space = None
	fdata.ftype = path.split("/")[0]

	fdata.nametag = {}
	fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
	fdata.nametag["type"] = "File"
	fdata.nametag["path"] = fdata.ftype
	fdata.nametag["file"] = [fdata.file_ref]
	fdata.nametag["line_no"] = 1
	fdata.nametag["description"] = ""
	fdata.nametag["symbols"] = []

	self.data[ref] = fdata.nametag

	if self.debug & AbiDebug.WHAT_OPEN:
	self.log.debug("Opening file %s", fname)

	if basename == "README":
	self.parse_readme(fdata.nametag, fname)
	return

	with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
	for line in fp:
	fdata.ln += 1

	self._parse_line(fdata, line)

	if "description" in fdata.nametag:
	fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")

	if fdata.key:
	if "description" not in self.data.get(fdata.key, {}):
	self.warn(fdata, f"{fdata.key} doesn't have a description")

	for w in fdata.what:
	self.add_symbol(what=w, fname=fname, xref=fdata.key)

	def _parse_abi(self, root=None):
	"""Internal function to parse documentation ABI recursively"""

	if not root:
	root = self.directory

	with os.scandir(root) as obj:
	for entry in obj:
	name = os.path.join(root, entry.name)

	if entry.is_dir():
	self._parse_abi(name)
	continue

	if not entry.is_file():
	continue

	basename = os.path.basename(name)

	if basename.startswith("."):
	continue

	if basename.endswith(self.ignore_suffixes):
	continue

	path = self.re_abi_dir.sub("", os.path.dirname(name))

	self.parse_file(name, path, basename)

	def parse_abi(self, root=None):
	"""Parse documentation ABI"""

	self._parse_abi(root)

	if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
	self.log.debug(pformat(self.data))

	def desc_txt(self, desc):
	"""Print description as found inside ABI files"""

	desc = desc.strip(" \t\n")

	return desc + "\n\n"

	def xref(self, fname):
	"""
	Converts a Documentation/ABI + basename into a ReST cross-reference
	"""

	xref = self.file_refs.get(fname)
	if not xref:
	return None
	else:
	return xref

	def desc_rst(self, desc):
	"""Enrich ReST output by creating cross-references"""

	# Remove title markups from the description
	# Having titles inside ABI files will only work if extra
	# care would be taken in order to strictly follow the same
	# level order for each markup.
	desc = self.re_title_mark.sub("\n\n", "\n" + desc)
	desc = desc.rstrip(" \t\n").lstrip("\n")

	# Python's regex performance for non-compiled expressions is a lot
	# than Perl, as Perl automatically caches them at their
	# first usage. Here, we'll need to do the same, as otherwise the
	# performance penalty is be high

	new_desc = ""
	for d in desc.split("\n"):
	if d == "":
	new_desc += "\n"
	continue

	# Use cross-references for doc files where needed
	d = self.re_doc.sub(r":doc:`/\1`", d)

	# Use cross-references for ABI generated docs where needed
	matches = self.re_abi.findall(d)
	for m in matches:
	abi = m[0] + m[1]

	xref = self.file_refs.get(abi)
	if not xref:
	# This may happen if ABI is on a separate directory,
	# like parsing ABI testing and symbol is at stable.
	# The proper solution is to move this part of the code
	# for it to be inside sphinx/kernel_abi.py
	self.log.info("Didn't find ABI reference for '%s'", abi)
	else:
	new = self.re_escape.sub(r"\\\1", m[1])
	d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)

	# Seek for cross reference symbols like /sys/...
	# Need to be careful to avoid doing it on a code block
	if d[0] not in [" ", "\t"]:
	matches = self.re_xref_node.findall(d)
	for m in matches:
	# Finding ABI here is more complex due to wildcards
	xref = self.what_refs.get(m)
	if xref:
	new = self.re_escape.sub(r"\\\1", m)
	d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)

	new_desc += d + "\n"

	return new_desc + "\n\n"

	def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
	filter_path=None):
	"""Print ABI at stdout"""

	part = None
	for key, v in sorted(self.data.items(),
	key=lambda x: (x[1].get("type", ""),
	x[1].get("what"))):

	wtype = v.get("type", "Symbol")
	file_ref = v.get("file")
	names = v.get("what", [""])

	if wtype == "File":
	if not show_file:
	continue
	else:
	if not show_symbols:
	continue

	if filter_path:
	if v.get("path") != filter_path:
	continue

	msg = ""

	if wtype != "File":
	cur_part = names[0]
	if cur_part.find("/") >= 0:
	match = self.re_what.match(cur_part)
	if match:
	symbol = match.group(1).rstrip("/")
	cur_part = "Symbols under " + symbol

	if cur_part and cur_part != part:
	part = cur_part
	msg += part + "\n"+ "-" * len(part) +"\n\n"

	msg += f".. _{key}:\n\n"

	max_len = 0
	for i in range(0, len(names)): # pylint: disable=C0200
	names[i] = "" + self.re_escape.sub(r"\\\1", names[i]) + ""

	max_len = max(max_len, len(names[i]))

	msg += "+-" + "-" * max_len + "-+\n"
	for name in names:
	msg += f"\| {name}" + " " * (max_len - len(name)) + " \|\n"
	msg += "+-" + "-" * max_len + "-+\n"
	msg += "\n"

	for ref in file_ref:
	if wtype == "File":
	msg += f".. _{ref[1]}:\n\n"
	else:
	base = os.path.basename(ref[0])
	msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"

	if wtype == "File":
	msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"

	desc = v.get("description")
	if not desc and wtype != "File":
	msg += f"DESCRIPTION MISSING for {names[0]}\n\n"

	if desc:
	if output_in_txt:
	msg += self.desc_txt(desc)
	else:
	msg += self.desc_rst(desc)

	symbols = v.get("symbols")
	if symbols:
	msg += "Has the following ABI:\n\n"

	for w, label in symbols:
	# Escape special chars from content
	content = self.re_escape.sub(r"\\\1", w)

	msg += f"- :ref:`{content} <{label}>`\n\n"

	users = v.get("users")
	if users and users.strip(" \t\n"):
	users = users.strip("\n").replace('\n', '\n\t')
	msg += f"Users:\n\t{users}\n\n"

	ln = v.get("line_no", 1)

	yield (msg, file_ref[0][0], ln)

	def check_issues(self):
	"""Warn about duplicated ABI entries"""

	for what, v in self.what_symbols.items():
	files = v.get("file")
	if not files:
	# Should never happen if the parser works properly
	self.log.warning("%s doesn't have a file associated", what)
	continue

	if len(files) == 1:
	continue

	f = []
	for fname, lines in sorted(files.items()):
	if not lines:
	f.append(f"{fname}")
	elif len(lines) == 1:
	f.append(f"{fname}:{lines[0]}")
	else:
	m = fname + "lines "
	m += ", ".join(str(x) for x in lines)
	f.append(m)

	self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))

	def search_symbols(self, expr):
	""" Searches for ABI symbols """

	regex = re.compile(expr, re.I)

	found_keys = 0
	for t in sorted(self.data.items(), key=lambda x: [0]):
	v = t[1]

	wtype = v.get("type", "")
	if wtype == "File":
	continue

	for what in v.get("what", [""]):
	if regex.search(what):
	found_keys += 1

	kernelversion = v.get("kernelversion", "").strip(" \t\n")
	date = v.get("date", "").strip(" \t\n")
	contact = v.get("contact", "").strip(" \t\n")
	users = v.get("users", "").strip(" \t\n")
	desc = v.get("description", "").strip(" \t\n")

	files = []
	for f in v.get("file", ()):
	files.append(f[0])

	what = str(found_keys) + ". " + what
	title_tag = "-" * len(what)

	print(f"\n{what}\n{title_tag}\n")

	if kernelversion:
	print(f"Kernel version:\t\t{kernelversion}")

	if date:
	print(f"Date:\t\t\t{date}")

	if contact:
	print(f"Contact:\t\t{contact}")

	if users:
	print(f"Users:\t\t\t{users}")

	print("Defined on file(s):\t" + ", ".join(files))

	if desc:
	desc = desc.strip("\n")
	print(f"\n{desc}\n")

	if not found_keys:
	print(f"Regular expression /{expr}/ not found.")