Source code for wsinfer.write_geojson

"""Convert CSVs of model outputs to GeoJSON files.

GeoJSON files can be loaded into whole slide image viewers like QuPath.
"""

from __future__ import annotations

import json
import uuid
from functools import partial
from pathlib import Path

import pandas as pd
from tqdm.contrib.concurrent import process_map


def _box_to_polygon(
    *, minx: int, miny: int, width: int, height: int
) -> list[tuple[int, int]]:
    """Get coordinates of a box polygon."""
    maxx = minx + width
    maxy = miny + height
    return [(maxx, miny), (maxx, maxy), (minx, maxy), (minx, miny), (maxx, miny)]


def _row_to_geojson(row: pd.Series, prob_cols: list[str]) -> dict:
    """Convert information about one tile to a single GeoJSON feature."""
    minx, miny, width, height = row["minx"], row["miny"], row["width"], row["height"]
    coords = _box_to_polygon(minx=minx, miny=miny, width=width, height=height)
    prob_dict = row[prob_cols].to_dict()

    measurements = {}
    for k, v in prob_dict.items():
        measurements[k] = v

    return {
        "type": "Feature",
        "id": str(uuid.uuid4()),
        "geometry": {
            "type": "Polygon",
            "coordinates": [coords],
        },
        "properties": {
            "isLocked": True,
            # measurements is a list of {"name": str, "value": float} dicts.
            # https://qupath.github.io/javadoc/docs/qupath/lib/measurements/MeasurementList.html
            "measurements": measurements,
            "objectType": "tile",
            # classification is a dict of "name": str and optionally "color": int.
            # https://qupath.github.io/javadoc/docs/qupath/lib/objects/classes/PathClass.html
            # We do not include classification because we do not enforce a single class
            # per tile.
            # "classification": {"name": class_name},
        },
    }


def _dataframe_to_geojson(df: pd.DataFrame, prob_cols: list[str]) -> dict:
    """Convert a dataframe of tiles to GeoJSON format."""
    features = df.apply(_row_to_geojson, axis=1, prob_cols=prob_cols)
    return {
        "type": "FeatureCollection",
        "features": features.tolist(),
    }



[docs]
def make_geojson(csv: Path, results_dir: Path) -> None:
    filename = csv.stem
    df = pd.read_csv(csv)
    prob_cols = [col for col in df.columns.tolist() if col.startswith("prob_")]
    if not prob_cols:
        raise KeyError("Did not find any columns with prob_ prefix.")
    geojson = _dataframe_to_geojson(df, prob_cols)
    with open(results_dir / "model-outputs-geojson" / f"{filename}.geojson", "w") as f:
        json.dump(geojson, f)




[docs]
def write_geojsons(csvs: list[Path], results_dir: Path, num_workers: int) -> None:
    output = results_dir / "model-outputs-geojson"

    if not results_dir.exists():
        raise FileExistsError(f"results_dir does not exist: {results_dir}")
    if (
        not (results_dir / "model-outputs-csv").exists()
        and (results_dir / "patches").exists()
    ):
        raise FileExistsError(
            "Model outputs have not been generated yet. Please run model inference."
        )
    if not (results_dir / "model-outputs-csv").exists():
        raise FileExistsError(
            "Expected results_dir to contain a 'model-outputs-csv' "
            "directory but it does not."
            "Please provide the path to the directory"
            "that contains model-outputs, masks, and patches."
        )
    if output.exists():
        geojsons = list((results_dir / "model-outputs-geojson").glob("*.geojson"))

        # Makes a list of filenames for both geojsons and csvs
        geojson_filenames = [filename.stem for filename in geojsons]
        csv_filenames = [filename.stem for filename in csvs]

        # Makes a list of new csvs that need to be converted to geojson
        csvs_new = [csv for csv in csv_filenames if csv not in geojson_filenames]
        csvs = [path for path in csvs if path.stem in csvs_new]
    else:
        # If output directory doesn't exist, make one and set csvs_final to csvs
        output.mkdir(parents=True, exist_ok=True)

    func = partial(make_geojson, results_dir=results_dir)
    process_map(func, csvs, max_workers=num_workers, chunksize=1)