#!/usr/bin/env python3
"""
Check .nc files using xarray (parallel) and write failing paths to a text file.

Example:
    python check_netcdf_click.py /data --out failed.txt --workers 8
"""

from __future__ import annotations
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
import os
import click
import xarray as xr


def _try_open_xarray(path: Path) -> tuple[str, bool, str]:
    """
    Worker: attempt to open a NetCDF file with xarray.
    Returns (path, ok, error_message).
    """
    print("OPENING :: ", path)
    try:
        # Try netcdf4 engine first; fall back to xarray's default if unavailable.
        try:
            ds = xr.open_dataset(path, engine="netcdf4")
        except Exception:
            ds = xr.open_dataset(path)

        ds.close()
        return (str(path), True, "")
    except Exception as e:
        return (str(path), False, f"{type(e).__name__}: {e}")


def _find_nc_files(root: Path) -> list[Path]:
    return [p.resolve() for p in root.rglob("*.nc") if p.is_file()]


@click.command(context_settings=dict(help_option_names=["-h", "--help"]))
@click.argument("root", type=click.Path(exists=True, file_okay=False, path_type=Path))
@click.option("--out", "out_path",
              type=click.Path(dir_okay=False, writable=True, path_type=Path),
              default=Path("failed_files.txt"),
              show_default=True,
              help="Output text file that will contain one failing path per line.")
@click.option("--workers", type=int, default=0, show_default=True,
              help="Number of worker processes (0 -> use all available cores).")
def cli(root: Path, out_path: Path, workers: int) -> None:
    """Recursively scan ROOT for .nc files, open with xarray, and log failures."""
    files = _find_nc_files(root)
    if not files:
        click.echo("No .nc files found. Creating empty output file.")
        out_path.write_text("")
        return

    max_workers = os.cpu_count() if not workers else max(1, workers)
    click.echo(f"Found {len(files)} .nc files. Using {max_workers} workers.")
    failed: list[tuple[str, str]] = []

    with click.progressbar(length=len(files), label="Checking files") as bar:
        with ProcessPoolExecutor(max_workers=max_workers) as ex:
            futures = {ex.submit(_try_open_xarray, f): f for f in files}
            for fut in as_completed(futures):
                path_str, ok, err = fut.result()
                if not ok:
                    failed.append((path_str, err))
                bar.update(1)

    # Write only the paths of failed files
    with out_path.open("w", encoding="utf-8") as f:
        for path_str, _err in failed:
            f.write(path_str + "\n")

    click.echo(f"Done. OK: {len(files) - len(failed)}  Failed: {len(failed)}")
    click.echo(f"Failed file list written to: {out_path.resolve()}")


if __name__ == "__main__":
    cli()

