Hi Adrian,
No worries! I'll share a Python code snippet below to that converts GeoJSON files to a single numpy array.
I hope this helps.
If you have further questions, feel free to reply.
Kind regards,
Daniel
import json
import os
import numpy as np
from shapely.geometry import shape
from rasterio.features import rasterize
CLASS_MAPPING_TISSUE_R = {
"tissue_stroma": 1,
"tissue_blood_vessel": 2,
"tissue_tumor": 3,
"tissue_epidermis": 4,
"tissue_necrosis": 5,
}
CLASS_MAPPING_PUMA_R = {
"nuclei_lymphocyte": 1,
"nuclei_tumor": 2,
"nuclei_other": 3
}
CLASS_MAPPING_PUMA10_R = {
"nuclei_endothelium": 1,
"nuclei_plasma_cell": 2,
"nuclei_stroma": 3,
"nuclei_tumor": 4,
"nuclei_histiocyte": 5,
"nuclei_apoptosis": 6,
"nuclei_epithelium": 7,
"nuclei_melanophage": 8,
"nuclei_neutrophil": 9,
"nuclei_lymphocyte": 10,
}
def convert_geojson_to_npy(input_dir, output_file, all_classes=False, tissue=False, image_shape=(1024, 1024)):
"""
Convert GeoJSON files in a directory to a single .npy file for labels.
Parameters:
- input_dir (str): Directory containing GeoJSON files.
- output_file (str): Path to the output .npy file.
- all_classes (bool): If True, use CLASS_MAPPING_PUMA10_R for class mapping.
- tissue (bool): If True, use CLASS_MAPPING_TISSUE_R for class mapping.
- image_shape (tuple): Shape of the output masks (height, width).
"""
geojson_files = [f for f in os.listdir(input_dir) if f.endswith('.geojson')]
instance_map = []
class_map = []
# Mappings for class_name to class_id
if tissue:
class_mapping = CLASS_MAPPING_TISSUE_R
elif all_classes:
class_mapping = CLASS_MAPPING_PUMA10_R
else:
class_mapping = CLASS_MAPPING_PUMA_R
# Iterate over GeoJSON files to extract instance and class maps
for file_name in geojson_files:
file_path = os.path.join(input_dir, file_name)
with open(file_path, 'r') as geojson_file:
try:
data = json.load(geojson_file)
except json.JSONDecodeError:
print(f"Skipping invalid GeoJSON file: {file_name}")
continue
# Create temporary maps for the current GeoJSON file
current_instance_map = np.zeros(image_shape, dtype=np.uint32)
current_class_map = np.zeros(image_shape, dtype=np.uint8)
# Iterate over features in the GeoJSON
for i, feature in enumerate(data['features']):
geometry = shape(feature['geometry'])
class_name = feature['properties']['classification']['name']
# Rasterize the geometry onto the instance and class maps
mask = rasterize(
[(geometry, 1)],
out_shape=image_shape,
fill=0,
default_value=1,
dtype=np.uint8
)
current_instance_map[mask == 1] = i + 1 # Assign unique instance IDs
if class_name in class_mapping:
current_class_map[mask == 1] = class_mapping[class_name]
# Append current maps to the lists
instance_map.append(current_instance_map)
class_map.append(current_class_map)
# Convert lists of maps to numpy arrays
instance_map_np = np.stack(instance_map, axis=0)
class_map_np = np.stack(class_map, axis=0)
if tissue:
np.save(output_file, class_map_np)
print(f"Tissue labels have been saved to {output_file}")
return
# Combine both maps into a single array with shape (N, H, W, 2)
labels_np = np.zeros((instance_map_np.shape[0], instance_map_np.shape[1], instance_map_np.shape[2], 2), dtype=np.uint32)
labels_np[..., 0] = instance_map_np
labels_np[..., 1] = class_map_np
# Save the numpy array to a file
np.save(output_file, labels_np)
print(f"Nuclei labels have been saved to {output_file}")
# Example usage
# For Track 1 use all_classes = False, for Track 2, use all_classes = True
# To convert tissue annotations instead of nuclei annotations, set tissue = True
convert_geojson_to_npy('path/to/input/dir', 'output_npy_file.npy', all_classes=True, tissue=False)