From c147ad8db79793a62613e6bdbfc3ca5f384eb0e3 Mon Sep 17 00:00:00 2001 From: Roy Eric <139973278+Randomidous@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:38:31 +0200 Subject: [PATCH 1/3] add function to concatenate HTML files, such as reports --- doc/changes/devel/13223.newfeature.rst | 1 + mne/report/report.py | 88 ++++++++++++++++++++++++++ mne/report/tests/test_report.py | 45 +++++++++++++ pyproject.toml | 1 + 4 files changed, 135 insertions(+) create mode 100644 doc/changes/devel/13223.newfeature.rst diff --git a/doc/changes/devel/13223.newfeature.rst b/doc/changes/devel/13223.newfeature.rst new file mode 100644 index 00000000000..88f34b27cdc --- /dev/null +++ b/doc/changes/devel/13223.newfeature.rst @@ -0,0 +1 @@ +Add functionality to concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_. diff --git a/mne/report/report.py b/mne/report/report.py index be7c716cdc0..477ca8dffec 100644 --- a/mne/report/report.py +++ b/mne/report/report.py @@ -23,6 +23,7 @@ from shutil import copyfile import numpy as np +from bs4 import BeautifulSoup, Comment, Tag from .. import __version__ as MNE_VERSION from .._fiff.meas_info import Info, read_info @@ -675,6 +676,93 @@ def open_report(fname, **params): return report +def concatenate_reports(html_files, output_file): + """Concatenate multiple HTML files into one. + + This function reads multiple HTML files, extracts their
and sections, + and combines them into a single HTML file. It also handles duplicate IDs in the body + by removing them. + + Parameters + ---------- + html_files : list of str or Path + List of paths to the HTML files to be concatenated. + output_file : str or Path + Path to the output HTML file. + + Returns + ------- + final_html : BeautifulSoup + A BeautifulSoup object representing the combined HTML content. + """ + combined_head = BeautifulSoup("", "lxml").head + combined_body = BeautifulSoup("", "lxml").body + used_ids = set() + + for file in html_files: + file = Path(file) + with open(file, encoding="utf-8") as f: + soup = BeautifulSoup(f, "lxml") + + toc = soup.find(id="toc") or soup.find(class_="toc") + if toc: + toc.decompose() + + # handle head + if soup.head: + seen_styles = set() + seen_links = set() + seen_scripts = set() + + for tag in soup.head.find_all(["script", "link", "style"], recursive=True): + tag_str = str(tag) + if tag.name == "style": + if tag_str in seen_styles: + continue + seen_styles.add(tag_str) + elif tag.name == "link": + if tag_str in seen_links: + continue + seen_links.add(tag_str) + elif tag.name == "script": + if tag_str in seen_scripts: + continue + seen_scripts.add(tag_str) + + combined_head.append(tag) + + # handle body + if soup.body: + section = soup.new_tag("section") + section.append(soup.new_tag("hr")) + section.append(Comment(f"START {file.name}")) + + for tag in soup.body.contents: + if isinstance(tag, Tag): + for t in tag.find_all(True): + id_ = t.get("id") + if id_: + if id_ in used_ids: + del t["id"] + else: + used_ids.add(id_) + + section.append(tag) + + combined_body.append(section) + + # create final HTML + final_html = BeautifulSoup("", "lxml") + final_html.html.append(combined_head) + final_html.html.append(combined_body) + + output_file = Path(output_file) + with output_file.open("w", encoding="utf-8") as f: + f.write(final_html.prettify()) + + return final_html + + ############################################################################### # HTML scan renderer diff --git a/mne/report/tests/test_report.py b/mne/report/tests/test_report.py index 66f4cd9e336..fb33c2ff891 100644 --- a/mne/report/tests/test_report.py +++ b/mne/report/tests/test_report.py @@ -13,6 +13,7 @@ import numpy as np import pytest +from bs4 import BeautifulSoup from matplotlib import pyplot as plt from mne import ( @@ -33,6 +34,7 @@ from mne.report.report import ( _ALLOWED_IMAGE_FORMATS, CONTENT_ORDER, + concatenate_reports, ) from mne.utils import Bunch, _record_warnings from mne.utils._testing import assert_object_equal @@ -634,6 +636,49 @@ def test_open_report(tmp_path): assert h5io.read_hdf5(hdf5, title="companion") == "test" +def test_concatenate_reports(tmp_path, sample_meg_dir): + """Test the concatenate_reports function.""" + raw_path = sample_meg_dir / "sample_audvis_raw.fif" + raw = read_raw_fif(raw_path, preload=True) + raw.set_annotations(None) + raw.crop(0, 20) + + with tmp_path as tmp_dir: + tmp_path = Path(tmp_dir) + + # Report 1 with custom content + report1 = Report(title="Report eeg_preprocessing #1") + report1.add_html( + "