markdown_snippet_injector/markdown_snippet_injector.py

import argparse
import re
import xml.etree.ElementTree as ET


def extract_code_snippets(file_path: str) -> dict:
    """
    Extracts code snippets delimited by "BEGIN CODE SNIPPET XYZ" and "END CODE SNIPPET" from the given file, with XYZ being some user-defined name.
    
    All snippets are extracted and returned as a dictionary with the snippet name as the key, and the snippet contents as the value.

    :param file_path: File path in which the tag should be found.
    """
    snippets = dict()
    with open(file_path) as f_in:
        lines = f_in.readlines()
        snippet_name = None
        snippet_start_line = None
        for i, line in enumerate(lines):
            if "BEGIN CODE SNIPPET" in line:
                snippet_name = line.split("BEGIN CODE SNIPPET")[1].strip()
                snippet_start_line = i + 1
            if "END CODE SNIPPET" in line:
                snippets[snippet_name] = "".join(lines[snippet_start_line:i])
    return snippets


def decode_xml_snippet_tag(line):
    """
    Extracts and decodes the XML snippet tag under the following form :
    ```
    <include_snippet name="snippet_name" file="path/to/file.cpp"/>
    ```
        
    :param line: Line containing the XML tag.
    """
    # Extract the XML tag from the line
    the_match = re.match("(<include_snippet+.+\\/>)", line)
    if the_match:
        xml_string = the_match.group(1)
    else:
        return None

    # Parse the XML string
    root = ET.fromstring(f'<root>{xml_string}</root>')  # Wrap in a root tag if needed

    # Iterate over all include_snippet tags
    for elem in root.findall('include_snippet'):
        name = elem.get('name')
        file = elem.get('file')
    
    return {"name": name, "file": file}


def extract_snippet_infos(lines) -> dict:
    """
    Extracts the names of the snippets required from the source markdown file.

    Returns a dictionary of lists :
    - file_name_1
        - snippet_name_1
        - snippet_name_2
        - ...
    - file_name_2
        - snippet_name_1
        - snippet_name_2
        - ...
    - ...
    """
    snippet_infos = dict()
    snippet_infos_list = []

    for line in lines:
        if "<include_snippet" in line.strip():
            snippet_info = decode_xml_snippet_tag(line)
            snippet_infos_list.append(snippet_info)

    # Group the snippets by file
    for snippet_info in snippet_infos_list:
        if snippet_info["file"] not in snippet_infos.keys():
            snippet_infos[snippet_info["file"]] = []
        snippet_infos[snippet_info["file"]].append(snippet_info["name"])

    return snippet_infos


def insert_snippets_in_markdown(source_lines: list, snippets: dict):
    """
    Inserts the required snippets into the markdown file.
    
    Returns the lines of the output markdown file.
    """
    processed_lines = []

    for line in source_lines:
        if "<include_snippet" in line.strip():
            # Parse and decode the XML snippet tag
            snippet_info = decode_xml_snippet_tag(line)

            # Append the snippet contents to the output lines
            processed_lines.extend(snippets[snippet_info["file"]][snippet_info["name"]])
        else:
            # Append the source line verbatim
            processed_lines.append(line)

    return processed_lines


def print_snippet_information(snippet_infos: dict):
    """
    Prints the snippet information parsed from the source markdown.
    
    :param snippet_infos: Dict of file name and lists of snippet names.
    :type snippet_infos: dict
    """
    print("Required code snippets")
    print("----------------------")
    for file in snippet_infos:
        print(f"\"{file}\"")
        for name in snippet_infos[file]:
            print(f"    \"{name}\"")


def print_snippet_summary(snippets: dict):
    """
    Prints the snippet information parsed from the code files.
    
    :param snippets: Dict of file name and dict of snippet names and contents.
    :type snippets: dict
    """
    print("\nFound code snippets")
    print("-------------------")
    for file in snippets:
        print(f"\"{file}\"")
        for name in snippets[file]:
            print(f"    \"{name}\"")


def process_markdown_file(args):
    """
    Processes the markdown file by finding and replacing all the `<include_snippet/>` tags under the following form:

    ```
    <include_snippet name="snippet_name" file="path/to/file.cpp"/>
    ```
    
    Beware that the `<include_snippet/>` tag is case-sensitive ! It must be all lowercase to be correctly detected.

    :param args: Parsed program arguments.
    """
    with open(args.source) as f_in:
        with open(args.destination, "w+") as f_out:
            # Read source markdown file
            source_lines = f_in.readlines()

            # Extract required code snippets from markdown file
            snippet_infos = extract_snippet_infos(source_lines)
            print_snippet_information(snippet_infos)

            # Extract actual snippets
            snippets = dict()
            for file_path in snippet_infos.keys():
                snippets[file_path] = extract_code_snippets(file_path)
            print_snippet_summary(snippets)

            # Replace the snippet calls by the actual content
            output_lines = insert_snippets_in_markdown(source_lines, snippets)

            # Write the processed file
            f_out.writelines(output_lines)


def parse_args():
    """
    Parses the arguments of the program.
    """
    # Create the argument parser
    parser = argparse.ArgumentParser(description='Markdown code snippet injector.')

    # Add the positional arguments
    parser.add_argument('source', type=str, help='Source path (mandatory)')
    parser.add_argument('destination', type=str, help='Destination path (mandatory)')

    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    process_markdown_file(args)
Initial commit. 2026-02-04 14:09:41 +01:00			`import argparse`
			`import re`
			`import xml.etree.ElementTree as ET`


			`def extract_code_snippets(file_path: str) -> dict:`
			`"""`
			`Extracts code snippets delimited by "BEGIN CODE SNIPPET XYZ" and "END CODE SNIPPET" from the given file, with XYZ being some user-defined name.`

			`All snippets are extracted and returned as a dictionary with the snippet name as the key, and the snippet contents as the value.`

			`:param file_path: File path in which the tag should be found.`
			`"""`
			`snippets = dict()`
			`with open(file_path) as f_in:`
			`lines = f_in.readlines()`
			`snippet_name = None`
			`snippet_start_line = None`
			`for i, line in enumerate(lines):`
			`if "BEGIN CODE SNIPPET" in line:`
			`snippet_name = line.split("BEGIN CODE SNIPPET")[1].strip()`
			`snippet_start_line = i + 1`
			`if "END CODE SNIPPET" in line:`
			`snippets[snippet_name] = "".join(lines[snippet_start_line:i])`
			`return snippets`


			`def decode_xml_snippet_tag(line):`
			`"""`
			`Extracts and decodes the XML snippet tag under the following form :`
			```
			`<include_snippet name="snippet_name" file="path/to/file.cpp"/>`
			```

			`:param line: Line containing the XML tag.`
			`"""`
			`# Extract the XML tag from the line`
			`the_match = re.match("(<include_snippet+.+\\/>)", line)`
			`if the_match:`
			`xml_string = the_match.group(1)`
			`else:`
			`return None`

			`# Parse the XML string`
			`root = ET.fromstring(f'<root>{xml_string}</root>') # Wrap in a root tag if needed`

			`# Iterate over all include_snippet tags`
			`for elem in root.findall('include_snippet'):`
			`name = elem.get('name')`
			`file = elem.get('file')`

			`return {"name": name, "file": file}`


			`def extract_snippet_infos(lines) -> dict:`
			`"""`
			`Extracts the names of the snippets required from the source markdown file.`

			`Returns a dictionary of lists :`
			`- file_name_1`
			`- snippet_name_1`
			`- snippet_name_2`
			`- ...`
			`- file_name_2`
			`- snippet_name_1`
			`- snippet_name_2`
			`- ...`
			`- ...`
			`"""`
			`snippet_infos = dict()`
			`snippet_infos_list = []`

			`for line in lines:`
			`if "<include_snippet" in line.strip():`
			`snippet_info = decode_xml_snippet_tag(line)`
			`snippet_infos_list.append(snippet_info)`

			`# Group the snippets by file`
			`for snippet_info in snippet_infos_list:`
			`if snippet_info["file"] not in snippet_infos.keys():`
			`snippet_infos[snippet_info["file"]] = []`
			`snippet_infos[snippet_info["file"]].append(snippet_info["name"])`

			`return snippet_infos`


			`def insert_snippets_in_markdown(source_lines: list, snippets: dict):`
			`"""`
			`Inserts the required snippets into the markdown file.`

			`Returns the lines of the output markdown file.`
			`"""`
			`processed_lines = []`

			`for line in source_lines:`
			`if "<include_snippet" in line.strip():`
			`# Parse and decode the XML snippet tag`
			`snippet_info = decode_xml_snippet_tag(line)`

			`# Append the snippet contents to the output lines`
			`processed_lines.extend(snippets[snippet_info["file"]][snippet_info["name"]])`
			`else:`
			`# Append the source line verbatim`
			`processed_lines.append(line)`

			`return processed_lines`


			`def print_snippet_information(snippet_infos: dict):`
			`"""`
			`Prints the snippet information parsed from the source markdown.`

			`:param snippet_infos: Dict of file name and lists of snippet names.`
			`:type snippet_infos: dict`
			`"""`
			`print("Required code snippets")`
			`print("----------------------")`
			`for file in snippet_infos:`
			`print(f"\"{file}\"")`
			`for name in snippet_infos[file]:`
			`print(f" \"{name}\"")`


			`def print_snippet_summary(snippets: dict):`
			`"""`
			`Prints the snippet information parsed from the code files.`

			`:param snippets: Dict of file name and dict of snippet names and contents.`
			`:type snippets: dict`
			`"""`
			`print("\nFound code snippets")`
			`print("-------------------")`
			`for file in snippets:`
			`print(f"\"{file}\"")`
			`for name in snippets[file]:`
			`print(f" \"{name}\"")`


			`def process_markdown_file(args):`
			`"""`
			Processes the markdown file by finding and replacing all the `<include_snippet/>` tags under the following form:

			```
			`<include_snippet name="snippet_name" file="path/to/file.cpp"/>`
			```

			Beware that the `<include_snippet/>` tag is case-sensitive ! It must be all lowercase to be correctly detected.

			`:param args: Parsed program arguments.`
			`"""`
			`with open(args.source) as f_in:`
			`with open(args.destination, "w+") as f_out:`
			`# Read source markdown file`
			`source_lines = f_in.readlines()`

			`# Extract required code snippets from markdown file`
			`snippet_infos = extract_snippet_infos(source_lines)`
			`print_snippet_information(snippet_infos)`

			`# Extract actual snippets`
			`snippets = dict()`
			`for file_path in snippet_infos.keys():`
			`snippets[file_path] = extract_code_snippets(file_path)`
			`print_snippet_summary(snippets)`

			`# Replace the snippet calls by the actual content`
			`output_lines = insert_snippets_in_markdown(source_lines, snippets)`

			`# Write the processed file`
			`f_out.writelines(output_lines)`


			`def parse_args():`
			`"""`
			`Parses the arguments of the program.`
			`"""`
			`# Create the argument parser`
			`parser = argparse.ArgumentParser(description='Markdown code snippet injector.')`

			`# Add the positional arguments`
			`parser.add_argument('source', type=str, help='Source path (mandatory)')`
			`parser.add_argument('destination', type=str, help='Destination path (mandatory)')`

			`return parser.parse_args()`


			`if __name__ == "__main__":`
			`args = parse_args()`
			`process_markdown_file(args)`