137 lines
4.7 KiB
Python
Executable file
137 lines
4.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
def version_parse(v):
|
|
return tuple(map(int, (v.split("."))))
|
|
|
|
|
|
# Configuration
|
|
EN_FOLDER = "ISO27002-EN-2022"
|
|
NL_FOLDER = "ISO27002-NL-2022"
|
|
EN_PATTERN = re.compile(r"ISO_27002_2022_([\d\.]+)_OT.*\.md")
|
|
NL_PATTERN = re.compile(r"ISO_27002_2022_NL_([\d\.]+)_BT.*\.md")
|
|
# Reference patterns
|
|
EN_REF_PATTERN = re.compile(r"\(see ([\d\.]+)\)|in ([\d\.]+)\)")
|
|
NL_REF_PATTERN = re.compile(r"\(zie \[?([\d\.]+)\]?\(?[^)]*\)?\)|in ([\d\.]+)\)")
|
|
|
|
|
|
def extract_references(file_path, pattern):
|
|
"""Extract all reference numbers from a file"""
|
|
references = set()
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
matches = pattern.findall(content)
|
|
for match in matches:
|
|
# Either group 1 or group 2 will have the value
|
|
ref = match[0] if match[0] else match[1]
|
|
references.add(ref)
|
|
except Exception as e:
|
|
print(f"Error reading {file_path}: {e}")
|
|
return sorted(references, key=version_parse)
|
|
|
|
|
|
def main():
|
|
# Index files by section number
|
|
en_files = {}
|
|
nl_files = {}
|
|
|
|
# Scan EN folder
|
|
for filename in os.listdir(EN_FOLDER):
|
|
if not filename.endswith(".md"):
|
|
continue
|
|
match = EN_PATTERN.match(filename)
|
|
if match:
|
|
section = match.group(1)
|
|
en_files[section] = os.path.join(EN_FOLDER, filename)
|
|
|
|
# Scan NL folder
|
|
for filename in os.listdir(NL_FOLDER):
|
|
if not filename.endswith(".md"):
|
|
continue
|
|
match = NL_PATTERN.match(filename)
|
|
if match:
|
|
section = match.group(1)
|
|
nl_files[section] = os.path.join(NL_FOLDER, filename)
|
|
|
|
mismatches = []
|
|
matched = 0
|
|
|
|
# Compare each matching pair
|
|
for section in en_files:
|
|
if section not in nl_files:
|
|
mismatches.append({"section": section, "missing_nl": True})
|
|
continue
|
|
|
|
matched += 1
|
|
en_refs = extract_references(en_files[section], EN_REF_PATTERN)
|
|
nl_refs = extract_references(nl_files[section], NL_REF_PATTERN)
|
|
|
|
if set(en_refs) != set(nl_refs):
|
|
mismatches.append(
|
|
{
|
|
"section": section,
|
|
"en_file": Path(en_files[section]).name,
|
|
"nl_file": Path(nl_files[section]).name,
|
|
"en_refs": en_refs,
|
|
"nl_refs": nl_refs,
|
|
"only_en": sorted(set(en_refs) - set(nl_refs), key=version_parse),
|
|
"only_nl": sorted(set(nl_refs) - set(en_refs), key=version_parse),
|
|
}
|
|
)
|
|
|
|
# Sort mismatches by section number properly
|
|
mismatches.sort(key=lambda x: version_parse(x["section"]))
|
|
|
|
# Generate Markdown report
|
|
report_content = []
|
|
report_content.append("# ISO 27002:2022 Reference Mismatch Report")
|
|
report_content.append("")
|
|
report_content.append(f"**Generated:** {os.popen('date -Iseconds').read().strip()}")
|
|
report_content.append("")
|
|
report_content.append("## Summary")
|
|
report_content.append(f"- Total EN files: {len(en_files)}")
|
|
report_content.append(f"- Total NL files: {len(nl_files)}")
|
|
report_content.append(f"- Matched file pairs: {matched}")
|
|
report_content.append(f"- Files with mismatched references: {len(mismatches)}")
|
|
report_content.append("")
|
|
report_content.append("---")
|
|
report_content.append("")
|
|
|
|
for item in mismatches:
|
|
report_content.append(f"## Section {item['section']}")
|
|
report_content.append("")
|
|
report_content.append(f"- **EN file**: `{item['en_file']}`")
|
|
report_content.append(f"- **NL file**: `{item['nl_file']}`")
|
|
report_content.append("")
|
|
report_content.append("| Language | References |")
|
|
report_content.append("|----------|------------|")
|
|
report_content.append(
|
|
f"| English | {', '.join(item['en_refs']) if item['en_refs'] else '*None*'} |"
|
|
)
|
|
report_content.append(
|
|
f"| Dutch | {', '.join(item['nl_refs']) if item['nl_refs'] else '*None*'} |"
|
|
)
|
|
report_content.append("")
|
|
if item["only_en"]:
|
|
report_content.append(f"✅ **Only in EN**: {', '.join(item['only_en'])}")
|
|
if item["only_nl"]:
|
|
report_content.append(f"❌ **Only in NL**: {', '.join(item['only_nl'])}")
|
|
report_content.append("")
|
|
report_content.append("---")
|
|
report_content.append("")
|
|
|
|
# Write markdown file
|
|
with open("reference_mismatch_report.md", "w", encoding="utf-8") as f:
|
|
f.write("\n".join(report_content))
|
|
|
|
print(f"Report written to reference_mismatch_report.md")
|
|
print(f"Found {len(mismatches)} mismatched files, sorted by section number")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|