diff --git a/.komment/00000.json b/.komment/00000.json new file mode 100644 index 000000000000..af1248fffe59 --- /dev/null +++ b/.komment/00000.json @@ -0,0 +1,506 @@ +[ + { + "name": "build_for_portal.py", + "path": "build_for_portal.py", + "content": { + "structured": { + "description": "A set of tools for building and managing documentation in an Asciidoc format using Gitlab. It fetches source files from a remote repository, syncs them with local copies, builds master files, re-formats data for Drupal, and pushes changes back to the remote repository. The code uses various high-level packages such as configparser, logging, and subprocess, and employs techniques like command-line parsing, directory synchronization, and Git commands.", + "items": [ + { + "id": "fd276563-6416-f497-e247-c41a1bc72368", + "ancestors": [], + "description": "Initializes an argument parser using argparse and defines various command-line arguments, including options for specifying distribution, version, and branch information, as well as options for controlling the build process.", + "params": [], + "returns": { + "type_name": "argparseArgumentParser", + "description": "Used to parse command line arguments." + }, + "usage": { + "language": "python", + "code": "parser = setup_parser()\nargs = parser.parse_args([\"--distro\", \"openshift-enterprise\"])\n", + "description": "" + }, + "name": "setup_parser", + "location": { + "start": 126, + "insert": 127, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 40, + "docLength": null + }, + { + "id": "ff1f7987-97ce-0e97-4f49-ef292e1fe5df", + "ancestors": [ + "4071d37d-c1c9-5482-9040-4aaee9de523b" + ], + "description": "Processes a directory node, appending its name to the `master_entries` list if a specified condition is met (either the `include_name` variable is True or the directory depth exceeds zero).", + "params": [ + { + "name": "dir_node", + "type_name": "Dict[str, object]", + "description": "Expected to contain information about a directory node in a file system hierarchy, including the name of the directory." + }, + { + "name": "parent_dir", + "type_name": "str | None", + "description": "Passed from the parent function. It is not used within the function and can be inferred to represent the path or name of the directory that contains the current dir_node being processed." + }, + { + "name": "depth", + "type_name": "int", + "description": "0-indexed, indicating the current directory depth during recursive traversal. Its value increases by 1 for each nested directory level. This information is used to format the output string with indentation." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "dir_node = {\"Name\": \"Test Directory\"}\nmaster_entries = []\ndir_callback(dir_node, None, 0)", + "description": "" + }, + "name": "dir_callback", + "location": { + "start": 387, + "insert": 388, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 5, + "docLength": null + }, + { + "id": "4e4bdfba-2ad0-dd8d-994f-22b9441f3e54", + "ancestors": [ + "4071d37d-c1c9-5482-9040-4aaee9de523b" + ], + "description": "Constructs a file path and an Asciidoctor include statement based on input parameters, then appends these to the `master_entries` list depending on certain conditions regarding the presence of files in a specific directory and the value of the `all_in_one` variable.", + "params": [ + { + "name": "topic_node", + "type_name": "Dict", + "description": "Expected to contain information about a topic, including its name and associated file path." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used to specify the directory path for creating a full file path by joining it with the \"File\" attribute from the `topic_node` dictionary and appending \".adoc\"." + }, + { + "name": "depth", + "type_name": "int", + "description": "Used to specify the level offset for including a file with \"include::\" syntax in the master book file, indicating the depth of nesting of the included topic." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "topic_callback({\"File\": \"first\"}, \".\", 1)\ntopic_callback({\"File\": \"second\"}, \".\", 2)", + "description": "" + }, + "name": "topic_callback", + "location": { + "start": 393, + "insert": 394, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 11, + "docLength": null + }, + { + "id": "15ee13bb-9724-428a-ea4a-6a6c1fb51b35", + "ancestors": [ + "e6d2d77c-71a9-848c-7c4c-b6b2ece05850" + ], + "description": "Traverses a directory tree and copies image files from subdirectories to a specified destination directory (`dest_dir`). It checks if a source directory exists, lists its contents, and then recursively copies image files to the destination directory.", + "params": [ + { + "name": "dir_node", + "type_name": "Dict", + "description": "Expected to be a dictionary containing information about a directory node. The dictionary contains at least one key \"Dir\" representing the name of the directory." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used to construct the full path of a directory node. It provides the path up to the current directory being processed, allowing for the construction of the full path of the directory node." + }, + { + "name": "depth", + "type_name": "int", + "description": "Not explicitly used within the function. It seems to be intended for use in recursive directory traversal or recursion depth tracking, but its purpose remains unclear without further context." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "dir_node = {\"Dir\": \"node1\"}\nparent_dir = \"/path/to/parent/directory\"\ndest_dir = \"/path/to/destination/directory\"\n\ndir_callback(dir_node, parent_dir, 0)", + "description": "" + }, + "name": "dir_callback", + "location": { + "start": 470, + "insert": 471, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 8, + "docLength": null + }, + { + "id": "1c26d8ab-5cd2-34b0-d748-01da891afb34", + "ancestors": [ + "cdc9271f-2c09-45ac-b144-4c873c31ebfc" + ], + "description": "Creates a destination directory path by joining the base `dest_dir`, the `parent_dir`, and the current `dir_node`'s \"Dir\" value. It then ensures that this directory exists using the `ensure_directory` function.", + "params": [ + { + "name": "dir_node", + "type_name": "Dict", + "description": "Expected to contain key-value pairs representing directory information. Its keys likely include \"Dir\", \"Size\" or other relevant directory details." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used as part of constructing the path for the destination directory by joining it with the `dest_dir` and `dir_node[\"Dir\"]`. It represents the current parent directory." + }, + { + "name": "depth", + "type_name": "int", + "description": "Used to track the level of recursion while traversing the directory hierarchy." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "dir_node = {\"Dir\": \"node1\"}\nparent_dir = \"\"\ndepth = 0\ndir_callback(dir_node, parent_dir, depth)", + "description": "" + }, + "name": "dir_callback", + "location": { + "start": 488, + "insert": 489, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 3, + "docLength": null + }, + { + "id": "8ab65e15-a38e-6994-dc43-b5e8963ebbc0", + "ancestors": [ + "cdc9271f-2c09-45ac-b144-4c873c31ebfc" + ], + "description": "Copies an .adoc file from a source directory to a destination directory based on information provided by `topic_node`, which contains the file name and parent directory path. It also handles directories accordingly.", + "params": [ + { + "name": "topic_node", + "type_name": "Dict[str, str | int]", + "description": "Expected to contain information about a topic node, including its file name and possibly other attributes." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used to construct the source directory path (`node_src_dir`) and destination directory path (`node_dest_dir`). It represents the parent directory of the topic node being processed." + }, + { + "name": "depth", + "type_name": "int", + "description": "Used to specify the current directory level being processed while copying files from source directory to destination directory." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "topic_callback(\"Main\", \"src\", 0)\n", + "description": "" + }, + "name": "topic_callback", + "location": { + "start": 492, + "insert": 493, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 8, + "docLength": null + }, + { + "id": "8545f556-4a36-7faf-8045-db03accc16dd", + "ancestors": [], + "description": "Cleans up content from a file by identifying and replacing IDs with corresponding titles, fixing links, and handling duplicates and incorrect links, ultimately returning the scrubbed content as a string.", + "params": [ + { + "name": "content", + "type_name": "str", + "description": "Expected to be the content of a file from which links are to be extracted or modified." + }, + { + "name": "info", + "type_name": "Dict[str, Union[bool, List[str], str | int]]", + "description": "Used to hold information about the source file, including whether it's part of an all-in-one book, mapping of files to IDs, duplicate ID replacements, and incorrect link replacements." + }, + { + "name": "book_src_dir", + "type_name": "str", + "description": "Used as part of the construction of the base source file name by replacing info[\"src_dir\"] with book_src_dir. It represents the directory path to the book's source files." + }, + { + "name": "src_file", + "type_name": "str", + "description": "The source file name. It is used to replace the base path with `/` from the source file name, which is then used to get the corresponding title IDs for scrubbing content." + }, + { + "name": "tag", + "type_name": "str | None", + "description": "Optional by default. It appears to be used for fixing links, as it is passed along with other parameters to the `fix_links` function without being used within the main scrubbing logic." + }, + { + "name": "cwd", + "type_name": "str | None", + "description": "Used to specify the current working directory for the purpose of resolving file paths. If not provided, it defaults to None." + } + ], + "returns": { + "type_name": "str", + "description": "The processed and sanitized content after cleaning, replacing duplicate IDs, fixing incorrect links, and performing other transformations based on input parameters." + }, + "usage": { + "language": "python", + "code": "content = \"\"\"= Title 1 (anchor)\nContent Line 1\n= Title 2\nContent Line 2\"\"\"\nscrubbed_content = scrub_content(content, info, book_src_dir, src_file)", + "description": "" + }, + "name": "scrub_content", + "location": { + "start": 670, + "insert": 671, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 52, + "docLength": null + }, + { + "id": "b557a622-caaa-47a2-a746-58e922a15ab0", + "ancestors": [], + "description": "Takes a directory path, source file name, and information about book nodes as input. It searches for a book node with a matching directory path and returns its name if found; otherwise, it logs an error message and returns the directory path.", + "params": [ + { + "name": "dir", + "type_name": "str", + "description": "Used to match with the \"Dir\" key in the dictionary \"book_nodes\". It represents the directory name for which the corresponding book name needs to be retrieved." + }, + { + "name": "src_file", + "type_name": "str", + "description": "Used as a placeholder to format the error message when a book is not found for a directory." + }, + { + "name": "info", + "type_name": "Dict", + "description": "Expected to contain a key named \"book_nodes\". The value associated with this key should be a list of dictionaries, where each dictionary represents a book node." + } + ], + "returns": { + "type_name": "str|None", + "description": "1) the name of a book if it's found in the \"book_nodes\" list for a matching directory; or 2) the directory itself as a string if no matching book is found; or None if an error occurs." + }, + "usage": { + "language": "python", + "code": "info = {\"book_nodes\": [{\"Dir\": \"dir1\", \"Name\": \"book1\"}, {\"Dir\": \"dir2\", \"Name\": \"book2\"}]}\nname = dir_to_book_name(\"dir1\", \"src_file\", info)", + "description": "" + }, + "name": "dir_to_book_name", + "location": { + "start": 754, + "insert": 756, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 12, + "docLength": null + }, + { + "id": "cc2e3e21-232b-369e-964f-2f797bd4c716", + "ancestors": [ + "10c41c6a-de6e-2b9c-8641-586d499b2522" + ], + "description": "Extracts file IDs from an ADOC source file, based on a topic node and parent directory. It appends these IDs to a collection named `book_ids`, allowing for aggregation of file information across multiple topics.", + "params": [ + { + "name": "topic_node", + "type_name": "Dict", + "description": "Expected to contain information about a topic, including the file name as \"File\" key." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used to specify the directory path that contains source files. It serves as a prefix for constructing full paths of files related to a topic node." + }, + { + "name": "depth", + "type_name": "int", + "description": "Used to specify the level of recursion for traversing the directory hierarchy when processing files with \"File\" extension." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "topic_node = {\"File\": \"topic\"}\nparent_dir = \"/path/to/files\"\nbook_ids = []\ntopic_callback(topic_node, parent_dir, 0)", + "description": "" + }, + "name": "topic_callback", + "location": { + "start": 933, + "insert": 934, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 4, + "docLength": null + }, + { + "id": "006b29ab-d240-92b9-b749-e92df2f82d59", + "ancestors": [ + "36167b61-edcf-b08e-7543-b6c48bfd69eb" + ], + "description": "Maps a source file to an ID by joining the parent directory with the topic node's file name and extension, then calls another function to build the file ID based on the topic name and existing IDs.", + "params": [ + { + "name": "topic_node", + "type_name": "Dict[str, Any]", + "description": "Expected to contain information about a topic, including its name and file path." + }, + { + "name": "parent_dir", + "type_name": "str", + "description": "Used as the base directory for joining with other directories or files to form full paths, such as the source file path (`src_file`)." + }, + { + "name": "depth", + "type_name": "int", + "description": "Used to track the hierarchical structure of the topics. It helps to identify the position of each topic within its hierarchy." + } + ], + "returns": null, + "usage": { + "language": "python", + "code": "topic_node = {\"File\": \"main\", \"Name\": \"Introduction\"}\nparent_dir = \"path/to/adoc/files\"\nfile_to_id_map = {}\nexisting_ids = set()\ntopic_callback(topic_node, parent_dir, 0)", + "description": "" + }, + "name": "topic_callback", + "location": { + "start": 949, + "insert": 950, + "offset": " ", + "indent": 8, + "comment": null + }, + "item_type": "function", + "length": 5, + "docLength": null + }, + { + "id": "01bcb81b-556d-b2a8-d343-14f1c69dde44", + "ancestors": [], + "description": "Recursively compares two directories using `dircmp`. It removes files and subdirectories that are present on the right side but not on the left, and copies files and subdirectories from the left to the right if they exist or don't.", + "name": "_sync_directories_dircmp", + "location": { + "start": 1087, + "insert": 1089, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 18, + "docLength": null + }, + { + "id": "f8b4152a-74c3-a18c-7c48-babd26539bfe", + "ancestors": [], + "description": "Reads a configuration file and extracts relevant information for a specific repository based on the provided distro and version, returning a dictionary with the extracted data.", + "params": [ + { + "name": "config_file", + "type_name": "str", + "description": "Used as the path to a configuration file that contains information about repositories." + }, + { + "name": "distro", + "type_name": "str", + "description": "Used as part of the section name that it attempts to read from the configuration file along with the version number." + }, + { + "name": "version", + "type_name": "str", + "description": "Used as part of a section name to identify a specific configuration section within the config file." + } + ], + "returns": { + "type_name": "Dict[str,str]", + "description": "A dictionary where keys are strings and values are also strings. This dictionary represents repository URLs for given distro and version." + }, + "usage": { + "language": "python", + "code": "repo_urls = parse_repo_config('path/to/config/file', 'Ubuntu', '20.04')\n", + "description": "" + }, + "name": "parse_repo_config", + "location": { + "start": 1136, + "insert": 1138, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 15, + "docLength": null + }, + { + "id": "e9bbc467-d274-8da5-0141-3820caf59ad3", + "ancestors": [], + "description": "Builds Drupal files for a specified distribution and version from a build configuration file, fetches upstream sources if necessary, cleans and creates directories, and pushes changes to GitLab repositories if required.", + "params": [], + "returns": null, + "usage": { + "language": "python", + "code": "main(args=[\"--distro\", \"fedora28\", \"--title\", \"Drupal for Fedora 28\", \n \"--author\", \"John Doe\", \"--version\", \"1.0\", \"--product\", \"Drupal\"])\n", + "description": "" + }, + "name": "main", + "location": { + "start": 1154, + "insert": 1155, + "offset": " ", + "indent": 4, + "comment": null + }, + "item_type": "function", + "length": 74, + "docLength": null + } + ] + } + } + } +] \ No newline at end of file diff --git a/.komment/komment.json b/.komment/komment.json new file mode 100644 index 000000000000..987f4cf9e5c3 --- /dev/null +++ b/.komment/komment.json @@ -0,0 +1,16 @@ +{ + "meta": { + "version": "1", + "updated_at": "2024-07-27T00:25:01.666Z", + "created_at": "2024-07-27T00:04:04.693Z", + "pipelines": [ + "5ebdff20-691f-4501-8570-5350e9842bbb", + "ca588bbe-121b-4e01-b7e8-ffab1d100dda" + ] + }, + "lookup": [ + [ + "build_for_portal.py" + ] + ] +} \ No newline at end of file diff --git a/build_for_portal.py b/build_for_portal.py index a47c2ac107b3..a7cff6d6a9e0 100644 --- a/build_for_portal.py +++ b/build_for_portal.py @@ -124,6 +124,15 @@ def setup_parser(): + """ + Initializes an argument parser using argparse and defines various command-line + arguments, including options for specifying distribution, version, and branch + information, as well as options for controlling the build process. + + Returns: + argparseArgumentParser: Used to parse command line arguments. + + """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) @@ -385,12 +394,48 @@ def generate_master_entry( master_entries = [] def dir_callback(dir_node, parent_dir, depth): + """ + Processes a directory node, appending its name to the `master_entries` + list if a specified condition is met (either the `include_name` variable + is True or the directory depth exceeds zero). + + Args: + dir_node (Dict[str, object]): Expected to contain information about a + directory node in a file system hierarchy, including the name of + the directory. + parent_dir (str | None): Passed from the parent function. It is not + used within the function and can be inferred to represent the path + or name of the directory that contains the current dir_node being + processed. + depth (int): 0-indexed, indicating the current directory depth during + recursive traversal. Its value increases by 1 for each nested + directory level. This information is used to format the output + string with indentation. + + """ if include_name or depth > 0: master_entries.append( "=" * (depth + 1) + " " + dir_node["Name"].replace("\\", "") ) def topic_callback(topic_node, parent_dir, depth): + """ + Constructs a file path and an Asciidoctor include statement based on input + parameters, then appends these to the `master_entries` list depending on + certain conditions regarding the presence of files in a specific directory + and the value of the `all_in_one` variable. + + Args: + topic_node (Dict): Expected to contain information about a topic, + including its name and associated file path. + parent_dir (str): Used to specify the directory path for creating a + full file path by joining it with the "File" attribute from the + `topic_node` dictionary and appending ".adoc". + depth (int): Used to specify the level offset for including a file + with "include::" syntax in the master book file, indicating the + depth of nesting of the included topic. + + """ book_file_path = os.path.join(parent_dir, topic_node["File"] + ".adoc") file_path = os.path.join(book_dir, book_file_path) include = "include::" + book_file_path + "[leveloffset=+" + str(depth) + "]" @@ -468,6 +513,24 @@ def copy_images(node, src_path, dest_dir, distro): """ def dir_callback(dir_node, parent_dir, depth): + """ + Traverses a directory tree and copies image files from subdirectories to + a specified destination directory (`dest_dir`). It checks if a source + directory exists, lists its contents, and then recursively copies image + files to the destination directory. + + Args: + dir_node (Dict): Expected to be a dictionary containing information + about a directory node. The dictionary contains at least one key + "Dir" representing the name of the directory. + parent_dir (str): Used to construct the full path of a directory node. + It provides the path up to the current directory being processed, + allowing for the construction of the full path of the directory node. + depth (int): Not explicitly used within the function. It seems to be + intended for use in recursive directory traversal or recursion + depth tracking, but its purpose remains unclear without further context. + + """ node_dir = os.path.join(parent_dir, dir_node["Dir"]) src = os.path.join(node_dir, "images") @@ -486,10 +549,41 @@ def copy_files(node, book_src_dir, src_dir, dest_dir, info): """ def dir_callback(dir_node, parent_dir, depth): + """ + Creates a destination directory path by joining the base `dest_dir`, the + `parent_dir`, and the current `dir_node`'s "Dir" value. It then ensures + that this directory exists using the `ensure_directory` function. + + Args: + dir_node (Dict): Expected to contain key-value pairs representing + directory information. Its keys likely include "Dir", "Size" or + other relevant directory details. + parent_dir (str): Used as part of constructing the path for the + destination directory by joining it with the `dest_dir` and + `dir_node["Dir"]`. It represents the current parent directory. + depth (int): Used to track the level of recursion while traversing the + directory hierarchy. + + """ node_dest_dir = os.path.join(dest_dir, parent_dir, dir_node["Dir"]) ensure_directory(node_dest_dir) def topic_callback(topic_node, parent_dir, depth): + """ + Copies an .adoc file from a source directory to a destination directory + based on information provided by `topic_node`, which contains the file + name and parent directory path. It also handles directories accordingly. + + Args: + topic_node (Dict[str, str | int]): Expected to contain information + about a topic node, including its file name and possibly other attributes. + parent_dir (str): Used to construct the source directory path + (`node_src_dir`) and destination directory path (`node_dest_dir`). + It represents the parent directory of the topic node being processed. + depth (int): Used to specify the current directory level being processed + while copying files from source directory to destination directory. + + """ node_src_dir = os.path.join(src_dir, parent_dir) node_dest_dir = os.path.join(dest_dir, parent_dir) @@ -611,6 +705,11 @@ def copy_file( with open(dest_file, "w") as f: f.write(content) +# 7/26/24: +# Corrected the scrub_file function. +# Local files and URLs are now appropriately addressed individually. +# URL includes of files within the openshift-docs repo now generate an error. +# Files outside the `openshift-docs` repo are downloaded directly with error handling for connection issues. def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None): """ @@ -618,58 +717,104 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None): """ base_src_file = src_file.replace(info["src_dir"] + "/", "") - # added 1/Sep/2020 - # to allow loading files like json and yaml from external sources, this - # procedure loads the file recognizing that it starts with http - # it then checks if it exists or not, and if it exists, returns the raw data - # data that it finds. - if base_src_file.startswith("https://raw.githubusercontent.com/openshift/"): + if base_src_file.startswith("https://raw.githubusercontent.com/"): + # Disallow URL inclusion from openshift-docs + if "openshift-docs" in base_src_file: + log.error( + "Inclusion of files within the openshift-docs repository by URL is not supported: %s", + base_src_file, + ) + list_of_errors.append( + f"Inclusion of files within the openshift-docs repository by URL is not supported: {base_src_file}" + ) + return "" # Skip processing this file + + # Allow includes only from specific organizations + if not any(org in base_src_file for org in ["openshift/", "redhatofficial/"]): + log.error( + "Inclusion of files from unauthorized repositories by URL is not supported: %s", + base_src_file, + ) + list_of_errors.append( + f"Inclusion of files from unauthorized repositories by URL is not supported: {base_src_file}" + ) + return "" # Skip processing this file + try: response = requests.get(base_src_file) - if response: - return response.text + if response.status_code == 200: + # Only apply scrubbing if the external file is AsciiDoc + if base_src_file.endswith(".adoc"): + return scrub_content(response.text, info, book_src_dir, src_file, tag, cwd) + else: + return response.text else: - raise ConnectionError("Malformed URL") - except Exception as exception: - log.error("An include file wasn't found: %s", base_src_file) - list_of_errors.append(f"An include file wasn't found: {base_src_file}") - sys.exit(-1) - - # Get a list of predefined custom title ids for the file - title_ids = TITLE_IDS.get(base_src_file, {}) + raise ConnectionError(f"Failed to download file from {base_src_file}") + except Exception as e: + log.error(f"Error fetching external include: {base_src_file} - {e}") + list_of_errors.append(f"Error fetching external include: {base_src_file}") + return "" # Skip processing this file - # Read in the source content + # Local file processing with open(src_file, "r") as f: src_file_content = f.readlines() - # Scrub the content - content = "" + return scrub_content("".join(src_file_content), info, book_src_dir, src_file, tag, cwd) + +def scrub_content(content, info, book_src_dir, src_file, tag=None, cwd=None): + """ + Cleans up content from a file by identifying and replacing IDs with corresponding + titles, fixing links, and handling duplicates and incorrect links, ultimately + returning the scrubbed content as a string. + + Args: + content (str): Expected to be the content of a file from which links are + to be extracted or modified. + info (Dict[str, Union[bool, List[str], str | int]]): Used to hold information + about the source file, including whether it's part of an all-in-one + book, mapping of files to IDs, duplicate ID replacements, and incorrect + link replacements. + book_src_dir (str): Used as part of the construction of the base source + file name by replacing info["src_dir"] with book_src_dir. It represents + the directory path to the book's source files. + src_file (str): The source file name. It is used to replace the base path + with `/` from the source file name, which is then used to get the + corresponding title IDs for scrubbing content. + tag (str | None): Optional by default. It appears to be used for fixing + links, as it is passed along with other parameters to the `fix_links` + function without being used within the main scrubbing logic. + cwd (str | None): Used to specify the current working directory for the + purpose of resolving file paths. If not provided, it defaults to None. + + Returns: + str: The processed and sanitized content after cleaning, replacing duplicate + IDs, fixing incorrect links, and performing other transformations based + on input parameters. + + """ + base_src_file = src_file.replace(info["src_dir"] + "/", "") + title_ids = TITLE_IDS.get(base_src_file, {}) header_found = content_found = False current_id = None - for line in src_file_content: - # Ignore any leading blank lines, before any meaningful content is found + scrubbed_content = "" + + for line in content.splitlines(True): if line.strip() == "" and not content_found: continue - # Check if the line should be included in the output if include_line(line): content_found = True - - # Setup the document header content/id if not header_found and line.strip() != "" and line.startswith("="): header_found = True - if ( info["all_in_one"] and base_src_file in ALL_IN_ONE_SCRAP_TITLE and line.startswith("= ") ): continue - # Add a section id if one doesn't exist, so we have something to link to elif current_id is None and src_file in info["file_to_id_map"]: file_id = info["file_to_id_map"][src_file] - content += "[[" + file_id + "]]\n" - # Add a custom title id, if one is needed + scrubbed_content += "[[" + file_id + "]]\n" elif line.startswith("=") and current_id is None: for title in title_ids: title_re = ( @@ -678,32 +823,26 @@ def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None): + "( (anchor|\[).*?)?(\n)?$" ) if re.match(title_re, line): - content += "[[" + title_ids[title] + "]]\n" + scrubbed_content += "[[" + title_ids[title] + "]]\n" - # Set the current id based on the line content if current_id is None and ID_RE.match(line.strip()): current_id = line.strip() elif current_id is not None and line.strip != "": current_id = None - # Add the line to the processed content - content += line + scrubbed_content += line - # Fix up any duplicate ids if base_src_file in DUPLICATE_IDS: for duplicate_id, new_id in list(DUPLICATE_IDS[base_src_file].items()): - content = content.replace("[[" + duplicate_id + "]]", "[[" + new_id + "]]") + scrubbed_content = scrubbed_content.replace("[[" + duplicate_id + "]]", "[[" + new_id + "]]") - # Replace incorrect links with correct ones if base_src_file in INCORRECT_LINKS: for incorrect_link, fixed_link in list(INCORRECT_LINKS[base_src_file].items()): - content = content.replace(incorrect_link, fixed_link) + scrubbed_content = scrubbed_content.replace(incorrect_link, fixed_link) - # Fix up the links - content = fix_links(content, info, book_src_dir, src_file, tag=tag, cwd=cwd) - - return content + scrubbed_content = fix_links(scrubbed_content, info, book_src_dir, src_file, tag=tag, cwd=cwd) + return scrubbed_content def include_line(line): """ @@ -738,6 +877,28 @@ def fix_links(content, info, book_src_dir, src_file, tag=None, cwd=None): def dir_to_book_name(dir,src_file,info): # find a book name by the directory + """ + Takes a directory path, source file name, and information about book nodes as + input. It searches for a book node with a matching directory path and returns + its name if found; otherwise, it logs an error message and returns the directory + path. + + Args: + dir (str): Used to match with the "Dir" key in the dictionary "book_nodes". + It represents the directory name for which the corresponding book name + needs to be retrieved. + src_file (str): Used as a placeholder to format the error message when a + book is not found for a directory. + info (Dict): Expected to contain a key named "book_nodes". The value + associated with this key should be a list of dictionaries, where each + dictionary represents a book node. + + Returns: + str|None: 1) the name of a book if it's found in the "book_nodes" list for + a matching directory; or 2) the directory itself as a string if no matching + book is found; or None if an error occurs. + + """ for book in info["book_nodes"]: if book["Dir"] == dir: return(book["Name"]) @@ -889,15 +1050,23 @@ def remove_conditional_content(content, info, tag=None): for comment in COMMENT_CONTENT_RE.finditer(content): content = content.replace(comment.group(0), "") - # Remove content outside of tags +# Remove content outside of tags if tag is not None: + # Handle multiple tags separated by commas or semicolons + tags = [t.strip() for t in re.split(r'[;,]', tag)] + filtered_content = "" + for tag_match in TAG_CONTENT_RE.finditer(content): - tag_text = tag_match.group(0) + tag_text = tag_match.group(2) # Content between the tag markers tag_label = tag_match.group(1) - if tag_label == tag: - # Tag matches, so only use the content in the tag - content = tag_text - + if tag_label in tags: + # Tag matches, so include this tagged content + filtered_content += tag_text + + # Replace the original content with filtered content if any tags matched + if filtered_content: + content = filtered_content + return content @@ -908,6 +1077,21 @@ def collect_existing_ids(node, distro, path): book_ids = [] def topic_callback(topic_node, parent_dir, depth): + """ + Extracts file IDs from an ADOC source file, based on a topic node and + parent directory. It appends these IDs to a collection named `book_ids`, + allowing for aggregation of file information across multiple topics. + + Args: + topic_node (Dict): Expected to contain information about a topic, + including the file name as "File" key. + parent_dir (str): Used to specify the directory path that contains + source files. It serves as a prefix for constructing full paths + of files related to a topic node. + depth (int): Used to specify the level of recursion for traversing the + directory hierarchy when processing files with "File" extension. + + """ src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc") file_ids = extract_file_ids(src_file) book_ids.extend(file_ids) @@ -924,6 +1108,21 @@ def build_file_to_id_map(node, distro, existing_ids, path=""): file_to_id_map = {} def topic_callback(topic_node, parent_dir, depth): + """ + Maps a source file to an ID by joining the parent directory with the topic + node's file name and extension, then calls another function to build the + file ID based on the topic name and existing IDs. + + Args: + topic_node (Dict[str, Any]): Expected to contain information about a + topic, including its name and file path. + parent_dir (str): Used as the base directory for joining with other + directories or files to form full paths, such as the source file + path (`src_file`). + depth (int): Used to track the hierarchical structure of the topics. + It helps to identify the position of each topic within its hierarchy. + + """ src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc") file_to_id_map[src_file] = build_file_id( topic_node["Name"], file_to_id_map, existing_ids @@ -1063,6 +1262,12 @@ def sync_directories(src_dir, dest_dir, ignore=None): def _sync_directories_dircmp(dcmp): # Remove files that only exist in the dest directory + """ + Recursively compares two directories using `dircmp`. It removes files and + subdirectories that are present on the right side but not on the left, and + copies files and subdirectories from the left to the right if they exist or don't. + + """ for filename in dcmp.right_only: right = os.path.join(dcmp.right, filename) if os.path.isfile(right): @@ -1112,6 +1317,25 @@ def commit_and_push_changes(git_dir, git_branch, git_upstream_branch): def parse_repo_config(config_file, distro, version): # Make sure the repo config file exists + """ + Reads a configuration file and extracts relevant information for a specific + repository based on the provided distro and version, returning a dictionary + with the extracted data. + + Args: + config_file (str): Used as the path to a configuration file that contains + information about repositories. + distro (str): Used as part of the section name that it attempts to read + from the configuration file along with the version number. + version (str): Used as part of a section name to identify a specific + configuration section within the config file. + + Returns: + Dict[str,str]: A dictionary where keys are strings and values are also + strings. This dictionary represents repository URLs for given distro and + version. + + """ if not os.path.isfile(config_file): log.error("Failed loading the repo configuration from %s", config_file) sys.exit(-1) @@ -1129,6 +1353,12 @@ def parse_repo_config(config_file, distro, version): def main(): + """ + Builds Drupal files for a specified distribution and version from a build + configuration file, fetches upstream sources if necessary, cleans and creates + directories, and pushes changes to GitLab repositories if required. + + """ parser = setup_parser() args = parser.parse_args() logging.basicConfig(format="%(message)s", level=logging.INFO, stream=sys.stdout)