preprocessing

tit.reporting.generators.preprocessing ¶

Preprocessing report generator for TI-Toolbox.

This module provides a report generator for the preprocessing pipeline, creating comprehensive HTML reports with processing steps, input/output data, and quality control information.

PreprocessingReportGenerator ¶

PreprocessingReportGenerator(project_dir: str | Path, subject_id: str, session_id: str | None = None)

Bases: BaseReportGenerator

Report generator for preprocessing pipelines.

Creates comprehensive HTML reports including: - Input data summary - Processing steps with status - Output data summary - Software versions - Quality control visualizations - Methods boilerplate and references

Initialize the preprocessing report generator.

Parameters:

Name	Type	Description	Default
`project_dir`	`str \| Path`	Path to the project directory	required
`subject_id`	`str`	BIDS subject ID	required
`session_id`	`str \| None`	Optional session identifier	`None`

Source code in tit/reporting/generators/preprocessing.py

def __init__(
    self,
    project_dir: str | Path,
    subject_id: str,
    session_id: str | None = None,
):
    """
    Initialize the preprocessing report generator.

    Args:
        project_dir: Path to the project directory
        subject_id: BIDS subject ID
        session_id: Optional session identifier
    """
    super().__init__(
        project_dir=project_dir,
        subject_id=subject_id,
        session_id=session_id,
        report_type="preprocessing",
    )

    # Preprocessing-specific data
    self.input_data: dict[str, dict[str, Any]] = {}
    self.output_data: dict[str, dict[str, Any]] = {}
    self.processing_steps: list[dict[str, Any]] = []
    self.qc_images: list[dict[str, Any]] = []
    self.pipeline_config: dict[str, Any] = {}

set_pipeline_config ¶

set_pipeline_config(**config) -> None

Set the pipeline configuration.

Parameters:

Name	Type	Description	Default
`**config`		Pipeline configuration parameters	`{}`

Source code in tit/reporting/generators/preprocessing.py

def set_pipeline_config(self, **config) -> None:
    """
    Set the pipeline configuration.

    Args:
        **config: Pipeline configuration parameters
    """
    self.pipeline_config = config

add_input_data ¶

add_input_data(data_type: str, file_paths: list[str], metadata: dict[str, Any] | None = None) -> None

Add input data information.

Parameters:

Name	Type	Description	Default
`data_type`	`str`	Type of data (T1w, T2w, DWI, etc.)	required
`file_paths`	`list[str]`	List of input file paths	required
`metadata`	`dict[str, Any] \| None`	Optional metadata about the data	`None`

Source code in tit/reporting/generators/preprocessing.py

def add_input_data(
    self,
    data_type: str,
    file_paths: list[str],
    metadata: dict[str, Any] | None = None,
) -> None:
    """
    Add input data information.

    Args:
        data_type: Type of data (T1w, T2w, DWI, etc.)
        file_paths: List of input file paths
        metadata: Optional metadata about the data
    """
    self.input_data[data_type] = {
        "file_paths": file_paths,
        "metadata": metadata or {},
        "n_files": len(file_paths),
    }

add_output_data ¶

add_output_data(data_type: str, file_paths: list[str], metadata: dict[str, Any] | None = None) -> None

Add output data information.

Parameters:

Name	Type	Description	Default
`data_type`	`str`	Type of data (m2m, FreeSurfer, etc.)	required
`file_paths`	`list[str]`	List of output file paths	required
`metadata`	`dict[str, Any] \| None`	Optional metadata about the data	`None`

Source code in tit/reporting/generators/preprocessing.py

def add_output_data(
    self,
    data_type: str,
    file_paths: list[str],
    metadata: dict[str, Any] | None = None,
) -> None:
    """
    Add output data information.

    Args:
        data_type: Type of data (m2m, FreeSurfer, etc.)
        file_paths: List of output file paths
        metadata: Optional metadata about the data
    """
    self.output_data[data_type] = {
        "file_paths": file_paths,
        "metadata": metadata or {},
        "n_files": len(file_paths),
    }

add_processing_step ¶

add_processing_step(step_name: str, description: str | None = None, parameters: dict[str, Any] | None = None, status: StatusType | str = PENDING, duration: float | None = None, output_files: list[str] | None = None, figures: list[dict[str, Any]] | None = None, error_message: str | None = None) -> None

Add a processing step.

Parameters:

Name	Type	Description	Default
`step_name`	`str`	Name of the processing step	required
`description`	`str \| None`	Step description	`None`
`parameters`	`dict[str, Any] \| None`	Step parameters	`None`
`status`	`StatusType \| str`	Step status	`PENDING`
`duration`	`float \| None`	Duration in seconds	`None`
`output_files`	`list[str] \| None`	Output file paths	`None`
`figures`	`list[dict[str, Any]] \| None`	QC figures	`None`
`error_message`	`str \| None`	Error message if failed	`None`

Source code in tit/reporting/generators/preprocessing.py

def add_processing_step(
    self,
    step_name: str,
    description: str | None = None,
    parameters: dict[str, Any] | None = None,
    status: StatusType | str = StatusType.PENDING,
    duration: float | None = None,
    output_files: list[str] | None = None,
    figures: list[dict[str, Any]] | None = None,
    error_message: str | None = None,
) -> None:
    """
    Add a processing step.

    Args:
        step_name: Name of the processing step
        description: Step description
        parameters: Step parameters
        status: Step status
        duration: Duration in seconds
        output_files: Output file paths
        figures: QC figures
        error_message: Error message if failed
    """
    if isinstance(status, StatusType):
        status = status.value

    self.processing_steps.append(
        {
            "name": step_name,
            "description": description,
            "parameters": parameters or {},
            "status": status,
            "duration": duration,
            "output_files": output_files or [],
            "figures": figures or [],
            "error_message": error_message,
        }
    )

    # Track errors
    if status == "failed" and error_message:
        self.add_error(error_message, step=step_name)

add_qc_image ¶

add_qc_image(title: str, base64_data: str, step_name: str | None = None, caption: str | None = None, image_type: str = 'qc') -> None

Add a quality control image.

Parameters:

Name	Type	Description	Default
`title`	`str`	Image title	required
`base64_data`	`str`	Base64-encoded image data	required
`step_name`	`str \| None`	Associated processing step	`None`
`caption`	`str \| None`	Image caption	`None`
`image_type`	`str`	Type of QC image	`'qc'`

Source code in tit/reporting/generators/preprocessing.py

def add_qc_image(
    self,
    title: str,
    base64_data: str,
    step_name: str | None = None,
    caption: str | None = None,
    image_type: str = "qc",
) -> None:
    """
    Add a quality control image.

    Args:
        title: Image title
        base64_data: Base64-encoded image data
        step_name: Associated processing step
        caption: Image caption
        image_type: Type of QC image
    """
    self.qc_images.append(
        {
            "title": title,
            "base64_data": base64_data,
            "step_name": step_name,
            "caption": caption,
            "image_type": image_type,
        }
    )

scan_for_data ¶

scan_for_data() -> None

Automatically scan directories for input and output data.

Only scans for outputs that correspond to the processing steps that were added to this report.

Source code in tit/reporting/generators/preprocessing.py

def scan_for_data(self) -> None:
    """
    Automatically scan directories for input and output data.

    Only scans for outputs that correspond to the processing steps
    that were added to this report.
    """
    # Determine which steps were run based on added processing steps
    step_names = {s["name"].lower() for s in self.processing_steps}

    # Input data - look for raw data
    rawdata_dir = self.project_dir / "rawdata" / f"sub-{self.subject_id}"
    if rawdata_dir.exists():
        # Look for anatomical data
        anat_dir = rawdata_dir / "anat"
        if anat_dir.exists():
            t1_files = list(anat_dir.glob("*T1w*.nii*"))
            if t1_files:
                self.add_input_data("T1w", [str(f) for f in t1_files])

            t2_files = list(anat_dir.glob("*T2w*.nii*"))
            if t2_files:
                self.add_input_data("T2w", [str(f) for f in t2_files])

        # Look for diffusion data (only if QSI steps were run)
        if any("qsi" in s or "dti" in s or "diffusion" in s for s in step_names):
            dwi_dir = rawdata_dir / "dwi"
            if dwi_dir.exists():
                dwi_files = list(dwi_dir.glob("*.nii*"))
                if dwi_files:
                    self.add_input_data("DWI", [str(f) for f in dwi_files])

    # Output data - look for derivatives based on steps that were run
    derivatives_dir = self.project_dir / "derivatives"

    # DICOM conversion outputs (NIfTI files)
    if any("dicom" in s for s in step_names):
        nifti_dir = self.project_dir / "rawdata" / f"sub-{self.subject_id}"
        if nifti_dir.exists():
            nifti_files = list(nifti_dir.rglob("*.nii*"))
            if nifti_files:
                self.add_output_data("NIfTI (converted)", [str(nifti_dir)])

    # FreeSurfer outputs - only if recon step was run
    if any("freesurfer" in s or "recon" in s for s in step_names):
        fs_dir = derivatives_dir / "freesurfer" / f"sub-{self.subject_id}"
        if fs_dir.exists():
            self.add_output_data("FreeSurfer", [str(fs_dir)])

    # SimNIBS m2m outputs - only if charm/m2m step was run
    if any("simnibs" in s or "charm" in s or "m2m" in s for s in step_names):
        # Try multiple possible paths
        m2m_paths = [
            derivatives_dir
            / "SimNIBS"
            / f"sub-{self.subject_id}"
            / f"m2m_{self.subject_id}",
            derivatives_dir
            / "simnibs"
            / f"sub-{self.subject_id}"
            / f"m2m_{self.subject_id}",
            derivatives_dir / "simnibs" / f"m2m_sub-{self.subject_id}",
        ]
        for m2m_dir in m2m_paths:
            if m2m_dir.exists():
                self.add_output_data("SimNIBS m2m", [str(m2m_dir)])
                break

    # Tissue analysis outputs
    if any("tissue" in s for s in step_names):
        tissue_dir = derivatives_dir / "tissue_analysis" / f"sub-{self.subject_id}"
        if tissue_dir.exists():
            self.add_output_data("Tissue Analysis", [str(tissue_dir)])

    # QSIPrep outputs - only if qsiprep step was run
    if any("qsiprep" in s for s in step_names):
        qsiprep_dir = derivatives_dir / "qsiprep" / f"sub-{self.subject_id}"
        if qsiprep_dir.exists():
            self.add_output_data("QSIPrep", [str(qsiprep_dir)])

    # QSIRecon outputs - only if qsirecon step was run
    if any("qsirecon" in s for s in step_names):
        qsirecon_dir = derivatives_dir / "qsirecon" / f"sub-{self.subject_id}"
        if qsirecon_dir.exists():
            self.add_output_data("QSIRecon", [str(qsirecon_dir)])

    # DTI outputs - only if DTI step was run
    if any("dti" in s for s in step_names):
        dti_dir = derivatives_dir / "dti" / f"sub-{self.subject_id}"
        if dti_dir.exists():
            self.add_output_data("DTI Tensors", [str(dti_dir)])

create_preprocessing_report ¶

create_preprocessing_report(project_dir: str | Path, subject_id: str, processing_steps: list[dict[str, Any]] | None = None, output_path: str | Path | None = None, auto_scan: bool = True) -> Path

Convenience function to create a preprocessing report.

Parameters:

Name	Type	Description	Default
`project_dir`	`str \| Path`	Path to project directory	required
`subject_id`	`str`	BIDS subject ID	required
`processing_steps`	`list[dict[str, Any]] \| None`	List of processing step dictionaries	`None`
`output_path`	`str \| Path \| None`	Optional custom output path	`None`
`auto_scan`	`bool`	Whether to auto-scan for data	`True`

Returns:

Type	Description
`Path`	Path to the generated report

Source code in tit/reporting/generators/preprocessing.py

def create_preprocessing_report(
    project_dir: str | Path,
    subject_id: str,
    processing_steps: list[dict[str, Any]] | None = None,
    output_path: str | Path | None = None,
    auto_scan: bool = True,
) -> Path:
    """
    Convenience function to create a preprocessing report.

    Args:
        project_dir: Path to project directory
        subject_id: BIDS subject ID
        processing_steps: List of processing step dictionaries
        output_path: Optional custom output path
        auto_scan: Whether to auto-scan for data

    Returns:
        Path to the generated report
    """
    generator = PreprocessingReportGenerator(
        project_dir=project_dir,
        subject_id=subject_id,
    )

    if auto_scan:
        generator.scan_for_data()

    if processing_steps:
        for step in processing_steps:
            generator.add_processing_step(**step)

    return generator.generate(output_path)