markdown_snippet_injector/markdown_snippet_injector.py

190 lines
5.8 KiB
Python
Raw Permalink Normal View History

2026-02-04 14:09:41 +01:00
import argparse
import re
import xml.etree.ElementTree as ET
def extract_code_snippets(file_path: str) -> dict:
"""
Extracts code snippets delimited by "BEGIN CODE SNIPPET XYZ" and "END CODE SNIPPET" from the given file, with XYZ being some user-defined name.
All snippets are extracted and returned as a dictionary with the snippet name as the key, and the snippet contents as the value.
:param file_path: File path in which the tag should be found.
"""
snippets = dict()
with open(file_path) as f_in:
lines = f_in.readlines()
snippet_name = None
snippet_start_line = None
for i, line in enumerate(lines):
if "BEGIN CODE SNIPPET" in line:
snippet_name = line.split("BEGIN CODE SNIPPET")[1].strip()
snippet_start_line = i + 1
if "END CODE SNIPPET" in line:
snippets[snippet_name] = "".join(lines[snippet_start_line:i])
return snippets
def decode_xml_snippet_tag(line):
"""
Extracts and decodes the XML snippet tag under the following form :
```
<include_snippet name="snippet_name" file="path/to/file.cpp"/>
```
:param line: Line containing the XML tag.
"""
# Extract the XML tag from the line
the_match = re.match("(<include_snippet+.+\\/>)", line)
if the_match:
xml_string = the_match.group(1)
else:
return None
# Parse the XML string
root = ET.fromstring(f'<root>{xml_string}</root>') # Wrap in a root tag if needed
# Iterate over all include_snippet tags
for elem in root.findall('include_snippet'):
name = elem.get('name')
file = elem.get('file')
return {"name": name, "file": file}
def extract_snippet_infos(lines) -> dict:
"""
Extracts the names of the snippets required from the source markdown file.
Returns a dictionary of lists :
- file_name_1
- snippet_name_1
- snippet_name_2
- ...
- file_name_2
- snippet_name_1
- snippet_name_2
- ...
- ...
"""
snippet_infos = dict()
snippet_infos_list = []
for line in lines:
if "<include_snippet" in line.strip():
snippet_info = decode_xml_snippet_tag(line)
snippet_infos_list.append(snippet_info)
# Group the snippets by file
for snippet_info in snippet_infos_list:
if snippet_info["file"] not in snippet_infos.keys():
snippet_infos[snippet_info["file"]] = []
snippet_infos[snippet_info["file"]].append(snippet_info["name"])
return snippet_infos
def insert_snippets_in_markdown(source_lines: list, snippets: dict):
"""
Inserts the required snippets into the markdown file.
Returns the lines of the output markdown file.
"""
processed_lines = []
for line in source_lines:
if "<include_snippet" in line.strip():
# Parse and decode the XML snippet tag
snippet_info = decode_xml_snippet_tag(line)
# Append the snippet contents to the output lines
processed_lines.extend(snippets[snippet_info["file"]][snippet_info["name"]])
else:
# Append the source line verbatim
processed_lines.append(line)
return processed_lines
def print_snippet_information(snippet_infos: dict):
"""
Prints the snippet information parsed from the source markdown.
:param snippet_infos: Dict of file name and lists of snippet names.
:type snippet_infos: dict
"""
print("Required code snippets")
print("----------------------")
for file in snippet_infos:
print(f"\"{file}\"")
for name in snippet_infos[file]:
print(f" \"{name}\"")
def print_snippet_summary(snippets: dict):
"""
Prints the snippet information parsed from the code files.
:param snippets: Dict of file name and dict of snippet names and contents.
:type snippets: dict
"""
print("\nFound code snippets")
print("-------------------")
for file in snippets:
print(f"\"{file}\"")
for name in snippets[file]:
print(f" \"{name}\"")
def process_markdown_file(args):
"""
Processes the markdown file by finding and replacing all the `<include_snippet/>` tags under the following form:
```
<include_snippet name="snippet_name" file="path/to/file.cpp"/>
```
Beware that the `<include_snippet/>` tag is case-sensitive ! It must be all lowercase to be correctly detected.
:param args: Parsed program arguments.
"""
with open(args.source) as f_in:
with open(args.destination, "w+") as f_out:
# Read source markdown file
source_lines = f_in.readlines()
# Extract required code snippets from markdown file
snippet_infos = extract_snippet_infos(source_lines)
print_snippet_information(snippet_infos)
# Extract actual snippets
snippets = dict()
for file_path in snippet_infos.keys():
snippets[file_path] = extract_code_snippets(file_path)
print_snippet_summary(snippets)
# Replace the snippet calls by the actual content
output_lines = insert_snippets_in_markdown(source_lines, snippets)
# Write the processed file
f_out.writelines(output_lines)
def parse_args():
"""
Parses the arguments of the program.
"""
# Create the argument parser
parser = argparse.ArgumentParser(description='Markdown code snippet injector.')
# Add the positional arguments
parser.add_argument('source', type=str, help='Source path (mandatory)')
parser.add_argument('destination', type=str, help='Destination path (mandatory)')
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
process_markdown_file(args)