"""Report generation functions for EEG data processing.
This module provides standalone functions for generating comprehensive
processing reports and summaries.
"""
import json
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Union
import mne
import numpy as np
[docs]
def generate_processing_report(
raw_original: mne.io.Raw,
raw_cleaned: mne.io.Raw,
processing_steps: List[Dict],
output_path: Union[str, Path],
include_plots: bool = True,
title: str = "EEG Processing Report",
verbose: Optional[bool] = None,
) -> str:
"""Generate a comprehensive HTML processing report.
This function creates a detailed HTML report summarizing the EEG processing
pipeline, including statistics, processing steps, and optional visualizations.
Parameters
----------
raw_original : mne.io.Raw
Original raw EEG data before processing.
raw_cleaned : mne.io.Raw
Cleaned raw EEG data after processing.
processing_steps : list of dict
List of processing steps with metadata. Each dict should contain:
- 'step_name': Name of the processing step
- 'parameters': Dict of parameters used
- 'execution_time': Time taken for the step
- 'description': Brief description of what the step does
output_path : str or Path
Path where the HTML report will be saved.
include_plots : bool, default True
Whether to include plots in the report.
title : str, default "EEG Processing Report"
Title for the report.
verbose : bool or None, default None
Control verbosity of output.
Returns
-------
report_path : str
Path to the generated HTML report.
Examples
--------
>>> steps = [{'step_name': 'Filtering', 'parameters': {'low_freq': 0.1},
... 'execution_time': 2.3, 'description': 'Applied filter'}]
>>> report_path = generate_processing_report(raw_original, raw_cleaned, steps, "report.html")
See Also
--------
plot_raw_comparison : Create before/after comparison plots
plot_ica_components : Visualize ICA components
create_processing_summary : Create JSON processing summary
"""
# Input validation
if not isinstance(raw_original, mne.io.BaseRaw):
raise TypeError(
f"raw_original must be an MNE Raw object, got {type(raw_original).__name__}"
)
if not isinstance(raw_cleaned, mne.io.BaseRaw):
raise TypeError(
f"raw_cleaned must be an MNE Raw object, got {type(raw_cleaned).__name__}"
)
if not isinstance(processing_steps, list):
raise TypeError("processing_steps must be a list of dictionaries")
# Validate processing steps format
required_keys = ["step_name", "parameters", "execution_time", "description"]
for i, step in enumerate(processing_steps):
if not isinstance(step, dict):
raise ValueError(f"processing_steps[{i}] must be a dictionary")
for key in required_keys:
if key not in step:
raise ValueError(f"processing_steps[{i}] missing required key: {key}")
try:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Calculate processing statistics
stats = _calculate_processing_stats(raw_original, raw_cleaned)
# Generate HTML content
html_content = _generate_html_report(
raw_original=raw_original,
raw_cleaned=raw_cleaned,
processing_steps=processing_steps,
stats=stats,
title=title,
include_plots=include_plots,
output_dir=output_path.parent,
)
# Write HTML file
with open(output_path, "w", encoding="utf-8") as f:
f.write(html_content)
if verbose:
print(f"Processing report saved to: {output_path}")
return str(output_path)
except Exception as e:
raise RuntimeError(f"Failed to generate processing report: {str(e)}") from e
def _calculate_processing_stats(
raw_original: mne.io.Raw, raw_cleaned: mne.io.Raw
) -> Dict:
"""Calculate statistics comparing original and cleaned data."""
stats = {}
# Basic info
stats["original_duration"] = raw_original.times[-1] - raw_original.times[0]
stats["cleaned_duration"] = raw_cleaned.times[-1] - raw_cleaned.times[0]
stats["original_channels"] = len(raw_original.ch_names)
stats["cleaned_channels"] = len(raw_cleaned.ch_names)
stats["sampling_rate"] = raw_original.info["sfreq"]
# Data quality metrics
orig_data = raw_original.get_data()
clean_data = raw_cleaned.get_data()
stats["original_std_mean"] = float(np.mean(np.std(orig_data, axis=1)))
stats["cleaned_std_mean"] = float(np.mean(np.std(clean_data, axis=1)))
stats["noise_reduction"] = float(
(stats["original_std_mean"] - stats["cleaned_std_mean"])
/ stats["original_std_mean"]
* 100
)
# Annotation counts
stats["original_annotations"] = len(raw_original.annotations)
stats["cleaned_annotations"] = len(raw_cleaned.annotations)
stats["bad_annotations"] = len(
[ann for ann in raw_cleaned.annotations if ann["description"].startswith("BAD")]
)
return stats
def _generate_html_report(
raw_original: mne.io.Raw,
raw_cleaned: mne.io.Raw,
processing_steps: List[Dict],
stats: Dict,
title: str,
include_plots: bool,
output_dir: Path,
) -> str:
"""Generate the HTML content for the processing report."""
# HTML template
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
font-family: Arial, sans-serif;
line-height: 1.6;
margin: 0;
padding: 20px;
background-color: #f4f4f4;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background-color: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}}
h1, h2, h3 {{
color: #333;
}}
.summary-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 20px;
margin: 20px 0;
}}
.stat-box {{
background-color: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 5px;
padding: 15px;
text-align: center;
}}
.stat-value {{
font-size: 2em;
font-weight: bold;
color: #007bff;
}}
.stat-label {{
color: #6c757d;
font-size: 0.9em;
}}
.step-table {{
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}}
.step-table th, .step-table td {{
border: 1px solid #ddd;
padding: 12px;
text-align: left;
}}
.step-table th {{
background-color: #f2f2f2;
font-weight: bold;
}}
.step-table tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.timestamp {{
color: #6c757d;
font-size: 0.9em;
text-align: right;
margin-top: 20px;
}}
.plot-container {{
text-align: center;
margin: 20px 0;
}}
.plot-container img {{
max-width: 100%;
height: auto;
border: 1px solid #ddd;
border-radius: 5px;
}}
</style>
</head>
<body>
<div class="container">
<h1>{title}</h1>
<h2>Executive Summary</h2>
<div class="summary-grid">
<div class="stat-box">
<div class="stat-value">{stats['original_channels']}</div>
<div class="stat-label">Original Channels</div>
</div>
<div class="stat-box">
<div class="stat-value">{stats['cleaned_channels']}</div>
<div class="stat-label">Final Channels</div>
</div>
<div class="stat-box">
<div class="stat-value">{stats['original_duration']:.1f}s</div>
<div class="stat-label">Duration</div>
</div>
<div class="stat-box">
<div class="stat-value">{stats['sampling_rate']:.0f} Hz</div>
<div class="stat-label">Sampling Rate</div>
</div>
<div class="stat-box">
<div class="stat-value">{stats['noise_reduction']:.1f}%</div>
<div class="stat-label">Noise Reduction</div>
</div>
<div class="stat-box">
<div class="stat-value">{stats['bad_annotations']}</div>
<div class="stat-label">Bad Segments</div>
</div>
</div>
<h2>Processing Pipeline</h2>
<table class="step-table">
<thead>
<tr>
<th>Step</th>
<th>Description</th>
<th>Parameters</th>
<th>Execution Time</th>
</tr>
</thead>
<tbody>
"""
# Add processing steps
total_time = 0
for step in processing_steps:
params_str = ", ".join([f"{k}={v}" for k, v in step["parameters"].items()])
html_template += f"""
<tr>
<td><strong>{step['step_name']}</strong></td>
<td>{step['description']}</td>
<td><code>{params_str}</code></td>
<td>{step['execution_time']:.2f}s</td>
</tr>
"""
total_time += step["execution_time"]
# Continue HTML template
html_template += f"""
</tbody>
</table>
<p><strong>Total Processing Time:</strong> {total_time:.2f} seconds</p>
<h2>Data Quality Metrics</h2>
<table class="step-table">
<thead>
<tr>
<th>Metric</th>
<th>Original</th>
<th>Cleaned</th>
<th>Change</th>
</tr>
</thead>
<tbody>
<tr>
<td>Mean Channel Standard Deviation</td>
<td>{stats['original_std_mean']:.2e}</td>
<td>{stats['cleaned_std_mean']:.2e}</td>
<td>{stats['noise_reduction']:.1f}% reduction</td>
</tr>
<tr>
<td>Number of Annotations</td>
<td>{stats['original_annotations']}</td>
<td>{stats['cleaned_annotations']}</td>
<td>+{stats['cleaned_annotations'] - stats['original_annotations']}</td>
</tr>
<tr>
<td>Bad Segment Annotations</td>
<td>0</td>
<td>{stats['bad_annotations']}</td>
<td>+{stats['bad_annotations']}</td>
</tr>
</tbody>
</table>
"""
# Add plots if requested
if include_plots:
html_template += """
<h2>Visualizations</h2>
<div class="plot-container">
<h3>Raw Data Comparison</h3>
<p>Red: Original Data, Black: Cleaned Data</p>
<!-- Plot will be added here if generated -->
</div>
"""
# Close HTML
html_template += f"""
<div class="timestamp">
Report generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
</div>
</div>
</body>
</html>
"""
return html_template
[docs]
def create_processing_summary(
processing_steps: List[Dict], output_path: Optional[Union[str, Path]] = None
) -> Dict:
"""Create a JSON summary of processing steps.
Parameters
----------
processing_steps : list of dict
List of processing steps with metadata.
output_path : str, Path, or None, default None
Path to save JSON summary. If None, returns dict only.
Returns
-------
summary : dict
Processing summary with statistics.
"""
summary = {
"total_steps": len(processing_steps),
"total_time": sum(step["execution_time"] for step in processing_steps),
"steps": processing_steps,
"generated_at": datetime.now().isoformat(),
}
if output_path is not None:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(summary, f, indent=2)
return summary