Source code for wsinfer.write_geojson

"""Convert CSVs of model outputs to GeoJSON files.

GeoJSON files can be loaded into whole slide image viewers like QuPath.
"""

from __future__ import annotations

import json
import uuid
from functools import partial
from pathlib import Path

import pandas as pd
from tqdm.contrib.concurrent import process_map


def _box_to_polygon(
    *, minx: int, miny: int, width: int, height: int
) -> list[tuple[int, int]]:
    """Get coordinates of a box polygon."""
    maxx = minx + width
    maxy = miny + height
    return [(maxx, miny), (maxx, maxy), (minx, maxy), (minx, miny), (maxx, miny)]


def _row_to_geojson(row: pd.Series, prob_cols: list[str]) -> dict:
    """Convert information about one tile to a single GeoJSON feature."""
    minx, miny, width, height = row["minx"], row["miny"], row["width"], row["height"]
    coords = _box_to_polygon(minx=minx, miny=miny, width=width, height=height)
    prob_dict = row[prob_cols].to_dict()

    measurements = {}
    for k, v in prob_dict.items():
        measurements[k] = v

    return {
        "type": "Feature",
        "id": str(uuid.uuid4()),
        "geometry": {
            "type": "Polygon",
            "coordinates": [coords],
        },
        "properties": {
            "isLocked": True,
            # measurements is a list of {"name": str, "value": float} dicts.
            # https://qupath.github.io/javadoc/docs/qupath/lib/measurements/MeasurementList.html
            "measurements": measurements,
            "objectType": "tile",
            # classification is a dict of "name": str and optionally "color": int.
            # https://qupath.github.io/javadoc/docs/qupath/lib/objects/classes/PathClass.html
            # We do not include classification because we do not enforce a single class
            # per tile.
            # "classification": {"name": class_name},
        },
    }


def _dataframe_to_geojson(df: pd.DataFrame, prob_cols: list[str]) -> dict:
    """Convert a dataframe of tiles to GeoJSON format."""
    features = df.apply(_row_to_geojson, axis=1, prob_cols=prob_cols)
    return {
        "type": "FeatureCollection",
        "features": features.tolist(),
    }


[docs] def make_geojson(csv: Path, results_dir: Path) -> None: filename = csv.stem df = pd.read_csv(csv) prob_cols = [col for col in df.columns.tolist() if col.startswith("prob_")] if not prob_cols: raise KeyError("Did not find any columns with prob_ prefix.") geojson = _dataframe_to_geojson(df, prob_cols) with open(results_dir / "model-outputs-geojson" / f"{filename}.geojson", "w") as f: json.dump(geojson, f)
[docs] def write_geojsons(csvs: list[Path], results_dir: Path, num_workers: int) -> None: output = results_dir / "model-outputs-geojson" if not results_dir.exists(): raise FileExistsError(f"results_dir does not exist: {results_dir}") if ( not (results_dir / "model-outputs-csv").exists() and (results_dir / "patches").exists() ): raise FileExistsError( "Model outputs have not been generated yet. Please run model inference." ) if not (results_dir / "model-outputs-csv").exists(): raise FileExistsError( "Expected results_dir to contain a 'model-outputs-csv' " "directory but it does not." "Please provide the path to the directory" "that contains model-outputs, masks, and patches." ) if output.exists(): geojsons = list((results_dir / "model-outputs-geojson").glob("*.geojson")) # Makes a list of filenames for both geojsons and csvs geojson_filenames = [filename.stem for filename in geojsons] csv_filenames = [filename.stem for filename in csvs] # Makes a list of new csvs that need to be converted to geojson csvs_new = [csv for csv in csv_filenames if csv not in geojson_filenames] csvs = [path for path in csvs if path.stem in csvs_new] else: # If output directory doesn't exist, make one and set csvs_final to csvs output.mkdir(parents=True, exist_ok=True) func = partial(make_geojson, results_dir=results_dir) process_map(func, csvs, max_workers=num_workers, chunksize=1)