Source code for autoclean.functions.visualization.reports

"""Report generation functions for EEG data processing.

This module provides standalone functions for generating comprehensive
processing reports and summaries.
"""

import json
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Union

import mne
import numpy as np



[docs]
def generate_processing_report(
    raw_original: mne.io.Raw,
    raw_cleaned: mne.io.Raw,
    processing_steps: List[Dict],
    output_path: Union[str, Path],
    include_plots: bool = True,
    title: str = "EEG Processing Report",
    verbose: Optional[bool] = None,
) -> str:
    """Generate a comprehensive HTML processing report.

    This function creates a detailed HTML report summarizing the EEG processing
    pipeline, including statistics, processing steps, and optional visualizations.

    Parameters
    ----------
    raw_original : mne.io.Raw
        Original raw EEG data before processing.
    raw_cleaned : mne.io.Raw
        Cleaned raw EEG data after processing.
    processing_steps : list of dict
        List of processing steps with metadata. Each dict should contain:
        - 'step_name': Name of the processing step
        - 'parameters': Dict of parameters used
        - 'execution_time': Time taken for the step
        - 'description': Brief description of what the step does
    output_path : str or Path
        Path where the HTML report will be saved.
    include_plots : bool, default True
        Whether to include plots in the report.
    title : str, default "EEG Processing Report"
        Title for the report.
    verbose : bool or None, default None
        Control verbosity of output.

    Returns
    -------
    report_path : str
        Path to the generated HTML report.

    Examples
    --------
    >>> steps = [{'step_name': 'Filtering', 'parameters': {'low_freq': 0.1},
    ...           'execution_time': 2.3, 'description': 'Applied filter'}]
    >>> report_path = generate_processing_report(raw_original, raw_cleaned, steps, "report.html")

    See Also
    --------
    plot_raw_comparison : Create before/after comparison plots
    plot_ica_components : Visualize ICA components
    create_processing_summary : Create JSON processing summary
    """
    # Input validation
    if not isinstance(raw_original, mne.io.BaseRaw):
        raise TypeError(
            f"raw_original must be an MNE Raw object, got {type(raw_original).__name__}"
        )

    if not isinstance(raw_cleaned, mne.io.BaseRaw):
        raise TypeError(
            f"raw_cleaned must be an MNE Raw object, got {type(raw_cleaned).__name__}"
        )

    if not isinstance(processing_steps, list):
        raise TypeError("processing_steps must be a list of dictionaries")

    # Validate processing steps format
    required_keys = ["step_name", "parameters", "execution_time", "description"]
    for i, step in enumerate(processing_steps):
        if not isinstance(step, dict):
            raise ValueError(f"processing_steps[{i}] must be a dictionary")
        for key in required_keys:
            if key not in step:
                raise ValueError(f"processing_steps[{i}] missing required key: {key}")

    try:
        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # Calculate processing statistics
        stats = _calculate_processing_stats(raw_original, raw_cleaned)

        # Generate HTML content
        html_content = _generate_html_report(
            raw_original=raw_original,
            raw_cleaned=raw_cleaned,
            processing_steps=processing_steps,
            stats=stats,
            title=title,
            include_plots=include_plots,
            output_dir=output_path.parent,
        )

        # Write HTML file
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(html_content)

        if verbose:
            print(f"Processing report saved to: {output_path}")

        return str(output_path)

    except Exception as e:
        raise RuntimeError(f"Failed to generate processing report: {str(e)}") from e



def _calculate_processing_stats(
    raw_original: mne.io.Raw, raw_cleaned: mne.io.Raw
) -> Dict:
    """Calculate statistics comparing original and cleaned data."""
    stats = {}

    # Basic info
    stats["original_duration"] = raw_original.times[-1] - raw_original.times[0]
    stats["cleaned_duration"] = raw_cleaned.times[-1] - raw_cleaned.times[0]
    stats["original_channels"] = len(raw_original.ch_names)
    stats["cleaned_channels"] = len(raw_cleaned.ch_names)
    stats["sampling_rate"] = raw_original.info["sfreq"]

    # Data quality metrics
    orig_data = raw_original.get_data()
    clean_data = raw_cleaned.get_data()

    stats["original_std_mean"] = float(np.mean(np.std(orig_data, axis=1)))
    stats["cleaned_std_mean"] = float(np.mean(np.std(clean_data, axis=1)))
    stats["noise_reduction"] = float(
        (stats["original_std_mean"] - stats["cleaned_std_mean"])
        / stats["original_std_mean"]
        * 100
    )

    # Annotation counts
    stats["original_annotations"] = len(raw_original.annotations)
    stats["cleaned_annotations"] = len(raw_cleaned.annotations)
    stats["bad_annotations"] = len(
        [ann for ann in raw_cleaned.annotations if ann["description"].startswith("BAD")]
    )

    return stats


def _generate_html_report(
    raw_original: mne.io.Raw,
    raw_cleaned: mne.io.Raw,
    processing_steps: List[Dict],
    stats: Dict,
    title: str,
    include_plots: bool,
    output_dir: Path,
) -> str:
    """Generate the HTML content for the processing report."""

    # HTML template
    html_template = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>{title}</title>
        <style>
            body {{
                font-family: Arial, sans-serif;
                line-height: 1.6;
                margin: 0;
                padding: 20px;
                background-color: #f4f4f4;
            }}
            .container {{
                max-width: 1200px;
                margin: 0 auto;
                background-color: white;
                padding: 20px;
                border-radius: 8px;
                box-shadow: 0 2px 5px rgba(0,0,0,0.1);
            }}
            h1, h2, h3 {{
                color: #333;
            }}
            .summary-grid {{
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
                gap: 20px;
                margin: 20px 0;
            }}
            .stat-box {{
                background-color: #f8f9fa;
                border: 1px solid #dee2e6;
                border-radius: 5px;
                padding: 15px;
                text-align: center;
            }}
            .stat-value {{
                font-size: 2em;
                font-weight: bold;
                color: #007bff;
            }}
            .stat-label {{
                color: #6c757d;
                font-size: 0.9em;
            }}
            .step-table {{
                width: 100%;
                border-collapse: collapse;
                margin: 20px 0;
            }}
            .step-table th, .step-table td {{
                border: 1px solid #ddd;
                padding: 12px;
                text-align: left;
            }}
            .step-table th {{
                background-color: #f2f2f2;
                font-weight: bold;
            }}
            .step-table tr:nth-child(even) {{
                background-color: #f9f9f9;
            }}
            .timestamp {{
                color: #6c757d;
                font-size: 0.9em;
                text-align: right;
                margin-top: 20px;
            }}
            .plot-container {{
                text-align: center;
                margin: 20px 0;
            }}
            .plot-container img {{
                max-width: 100%;
                height: auto;
                border: 1px solid #ddd;
                border-radius: 5px;
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <h1>{title}</h1>
            
            <h2>Executive Summary</h2>
            <div class="summary-grid">
                <div class="stat-box">
                    <div class="stat-value">{stats['original_channels']}</div>
                    <div class="stat-label">Original Channels</div>
                </div>
                <div class="stat-box">
                    <div class="stat-value">{stats['cleaned_channels']}</div>
                    <div class="stat-label">Final Channels</div>
                </div>
                <div class="stat-box">
                    <div class="stat-value">{stats['original_duration']:.1f}s</div>
                    <div class="stat-label">Duration</div>
                </div>
                <div class="stat-box">
                    <div class="stat-value">{stats['sampling_rate']:.0f} Hz</div>
                    <div class="stat-label">Sampling Rate</div>
                </div>
                <div class="stat-box">
                    <div class="stat-value">{stats['noise_reduction']:.1f}%</div>
                    <div class="stat-label">Noise Reduction</div>
                </div>
                <div class="stat-box">
                    <div class="stat-value">{stats['bad_annotations']}</div>
                    <div class="stat-label">Bad Segments</div>
                </div>
            </div>
            
            <h2>Processing Pipeline</h2>
            <table class="step-table">
                <thead>
                    <tr>
                        <th>Step</th>
                        <th>Description</th>
                        <th>Parameters</th>
                        <th>Execution Time</th>
                    </tr>
                </thead>
                <tbody>
    """

    # Add processing steps
    total_time = 0
    for step in processing_steps:
        params_str = ", ".join([f"{k}={v}" for k, v in step["parameters"].items()])
        html_template += f"""
                    <tr>
                        <td><strong>{step['step_name']}</strong></td>
                        <td>{step['description']}</td>
                        <td><code>{params_str}</code></td>
                        <td>{step['execution_time']:.2f}s</td>
                    </tr>
        """
        total_time += step["execution_time"]

    # Continue HTML template
    html_template += f"""
                </tbody>
            </table>
            <p><strong>Total Processing Time:</strong> {total_time:.2f} seconds</p>
            
            <h2>Data Quality Metrics</h2>
            <table class="step-table">
                <thead>
                    <tr>
                        <th>Metric</th>
                        <th>Original</th>
                        <th>Cleaned</th>
                        <th>Change</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>Mean Channel Standard Deviation</td>
                        <td>{stats['original_std_mean']:.2e}</td>
                        <td>{stats['cleaned_std_mean']:.2e}</td>
                        <td>{stats['noise_reduction']:.1f}% reduction</td>
                    </tr>
                    <tr>
                        <td>Number of Annotations</td>
                        <td>{stats['original_annotations']}</td>
                        <td>{stats['cleaned_annotations']}</td>
                        <td>+{stats['cleaned_annotations'] - stats['original_annotations']}</td>
                    </tr>
                    <tr>
                        <td>Bad Segment Annotations</td>
                        <td>0</td>
                        <td>{stats['bad_annotations']}</td>
                        <td>+{stats['bad_annotations']}</td>
                    </tr>
                </tbody>
            </table>
    """

    # Add plots if requested
    if include_plots:
        html_template += """
            <h2>Visualizations</h2>
            <div class="plot-container">
                <h3>Raw Data Comparison</h3>
                <p>Red: Original Data, Black: Cleaned Data</p>
                <!-- Plot will be added here if generated -->
            </div>
        """

    # Close HTML
    html_template += f"""
            <div class="timestamp">
                Report generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
            </div>
        </div>
    </body>
    </html>
    """

    return html_template



[docs]
def create_processing_summary(
    processing_steps: List[Dict], output_path: Optional[Union[str, Path]] = None
) -> Dict:
    """Create a JSON summary of processing steps.

    Parameters
    ----------
    processing_steps : list of dict
        List of processing steps with metadata.
    output_path : str, Path, or None, default None
        Path to save JSON summary. If None, returns dict only.

    Returns
    -------
    summary : dict
        Processing summary with statistics.
    """
    summary = {
        "total_steps": len(processing_steps),
        "total_time": sum(step["execution_time"] for step in processing_steps),
        "steps": processing_steps,
        "generated_at": datetime.now().isoformat(),
    }

    if output_path is not None:
        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w") as f:
            json.dump(summary, f, indent=2)

    return summary