From c147ad8db79793a62613e6bdbfc3ca5f384eb0e3 Mon Sep 17 00:00:00 2001 From: Roy Eric <139973278+Randomidous@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:38:31 +0200 Subject: [PATCH 1/3] add function to concatenate HTML files, such as reports --- doc/changes/devel/13223.newfeature.rst | 1 + mne/report/report.py | 88 ++++++++++++++++++++++++++ mne/report/tests/test_report.py | 45 +++++++++++++ pyproject.toml | 1 + 4 files changed, 135 insertions(+) create mode 100644 doc/changes/devel/13223.newfeature.rst diff --git a/doc/changes/devel/13223.newfeature.rst b/doc/changes/devel/13223.newfeature.rst new file mode 100644 index 00000000000..88f34b27cdc --- /dev/null +++ b/doc/changes/devel/13223.newfeature.rst @@ -0,0 +1 @@ +Add functionality to concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_. diff --git a/mne/report/report.py b/mne/report/report.py index be7c716cdc0..477ca8dffec 100644 --- a/mne/report/report.py +++ b/mne/report/report.py @@ -23,6 +23,7 @@ from shutil import copyfile import numpy as np +from bs4 import BeautifulSoup, Comment, Tag from .. import __version__ as MNE_VERSION from .._fiff.meas_info import Info, read_info @@ -675,6 +676,93 @@ def open_report(fname, **params): return report +def concatenate_reports(html_files, output_file): + """Concatenate multiple HTML files into one. + + This function reads multiple HTML files, extracts their and sections, + and combines them into a single HTML file. It also handles duplicate IDs in the body + by removing them. + + Parameters + ---------- + html_files : list of str or Path + List of paths to the HTML files to be concatenated. + output_file : str or Path + Path to the output HTML file. + + Returns + ------- + final_html : BeautifulSoup + A BeautifulSoup object representing the combined HTML content. + """ + combined_head = BeautifulSoup("", "lxml").head + combined_body = BeautifulSoup("", "lxml").body + used_ids = set() + + for file in html_files: + file = Path(file) + with open(file, encoding="utf-8") as f: + soup = BeautifulSoup(f, "lxml") + + toc = soup.find(id="toc") or soup.find(class_="toc") + if toc: + toc.decompose() + + # handle head + if soup.head: + seen_styles = set() + seen_links = set() + seen_scripts = set() + + for tag in soup.head.find_all(["script", "link", "style"], recursive=True): + tag_str = str(tag) + if tag.name == "style": + if tag_str in seen_styles: + continue + seen_styles.add(tag_str) + elif tag.name == "link": + if tag_str in seen_links: + continue + seen_links.add(tag_str) + elif tag.name == "script": + if tag_str in seen_scripts: + continue + seen_scripts.add(tag_str) + + combined_head.append(tag) + + # handle body + if soup.body: + section = soup.new_tag("section") + section.append(soup.new_tag("hr")) + section.append(Comment(f"START {file.name}")) + + for tag in soup.body.contents: + if isinstance(tag, Tag): + for t in tag.find_all(True): + id_ = t.get("id") + if id_: + if id_ in used_ids: + del t["id"] + else: + used_ids.add(id_) + + section.append(tag) + + combined_body.append(section) + + # create final HTML + final_html = BeautifulSoup("", "lxml") + final_html.html.append(combined_head) + final_html.html.append(combined_body) + + output_file = Path(output_file) + with output_file.open("w", encoding="utf-8") as f: + f.write(final_html.prettify()) + + return final_html + + ############################################################################### # HTML scan renderer diff --git a/mne/report/tests/test_report.py b/mne/report/tests/test_report.py index 66f4cd9e336..fb33c2ff891 100644 --- a/mne/report/tests/test_report.py +++ b/mne/report/tests/test_report.py @@ -13,6 +13,7 @@ import numpy as np import pytest +from bs4 import BeautifulSoup from matplotlib import pyplot as plt from mne import ( @@ -33,6 +34,7 @@ from mne.report.report import ( _ALLOWED_IMAGE_FORMATS, CONTENT_ORDER, + concatenate_reports, ) from mne.utils import Bunch, _record_warnings from mne.utils._testing import assert_object_equal @@ -634,6 +636,49 @@ def test_open_report(tmp_path): assert h5io.read_hdf5(hdf5, title="companion") == "test" +def test_concatenate_reports(tmp_path, sample_meg_dir): + """Test the concatenate_reports function.""" + raw_path = sample_meg_dir / "sample_audvis_raw.fif" + raw = read_raw_fif(raw_path, preload=True) + raw.set_annotations(None) + raw.crop(0, 20) + + with tmp_path as tmp_dir: + tmp_path = Path(tmp_dir) + + # Report 1 with custom content + report1 = Report(title="Report eeg_preprocessing #1") + report1.add_html( + "
This is report one
", title="Note 1" + ) + report1.add_raw(raw, title="Raw data", psd=False) + report1.save(tmp_path / "report1.html", overwrite=True, open_browser=False) + + # Report 2 with different custom content + report2 = Report(title="Report eeg_preprocessing #2") + report2.add_html( + "
This is report two
", title="Note 2" + ) + report2.add_raw(raw, title="Raw data", psd=False) + report2.save(tmp_path / "report2.html", overwrite=True, open_browser=False) + + file1 = tmp_path / "report1.html" + file2 = tmp_path / "report2.html" + output_file = tmp_path / "combined.html" + + _ = concatenate_reports([file1, file2], output_file) + + assert output_file.exists() + + with open(output_file, encoding="utf-8") as f: + out_html = BeautifulSoup(f, "lxml") + + assert out_html.head is not None + assert out_html.body is not None + assert out_html.find(text=lambda t: "This is report one" in t) is not None + assert out_html.find(text=lambda t: "This is report two" in t) is not None + + def test_remove(): """Test removing figures from a report.""" r = Report() diff --git a/pyproject.toml b/pyproject.toml index 59e5a1703e3..60b95f257d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ full = ["mne[full-no-qt]", "PyQt6 != 6.6.0", "PyQt6-Qt6 != 6.6.0, != 6.7.0"] # and mne[full-pyside6], which will install PySide6 instead of PyQt6. full-no-qt = [ "antio >= 0.5.0", + "beautifulsoup4", "darkdetect", "defusedxml", "dipy", From d54b34d4b120f07a4be198453ddd2feea26e9f98 Mon Sep 17 00:00:00 2001 From: Roy Eric <139973278+Randomidous@users.noreply.github.com> Date: Wed, 23 Apr 2025 14:20:39 +0200 Subject: [PATCH 2/3] update doc/changes/devel --- doc/changes/devel/13223.newfeature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/devel/13223.newfeature.rst b/doc/changes/devel/13223.newfeature.rst index 88f34b27cdc..88f45bf44aa 100644 --- a/doc/changes/devel/13223.newfeature.rst +++ b/doc/changes/devel/13223.newfeature.rst @@ -1 +1 @@ -Add functionality to concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_. +Add functionality to :func:`mne.report.concatenate_reports` concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_. From f57c373af961b1e620ba86de5e2a403a8d574629 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Apr 2025 12:21:29 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 78c773e56bf..76c3187ab8e 100644 --- a/environment.yml +++ b/environment.yml @@ -5,6 +5,7 @@ channels: dependencies: - python >=3.10 - antio >=0.5.0 + - beautifulsoup4 - darkdetect - decorator - defusedxml