# -*- coding: utf-8 -*-
"""A StageData object acts like a data container that manages results generated by stages and read by subsequent
stages.
"""
from uval.stages.stage import uval_stage # type: ignore
from uval.stages.stage_data import Hdf5FilesData, SupportedDatasetSpecificationData # type: ignore
from uval.utils.log import logger # type: ignore
[docs]@uval_stage
def create_supported_dataset_from_intersection(gt_files: Hdf5FilesData, det_files: Hdf5FilesData):
"""Given a list of ground truth and detection files, try to join them given their volume ids."""
# Let GT be a set of ground truth volume ids, DET be a set of detection volume ids
# Determine GT & DET set, and the size of the GT-DET and DET-GT sets
gt_ids = set(gt_files.table["volume_id"])
det_ids = set(det_files.table["volume_id"])
set_intersection = gt_ids & det_ids
set_gt_minus_det = gt_ids - det_ids
set_det_minus_gt = det_ids - gt_ids
if len(set_intersection) == 0:
logger.error("Did not find any matching files between groundtruth and detections")
return SupportedDatasetSpecificationData() # Return empty set
# This is important feedback for the user to see if this matches their expectations
logger.info(
f"Dataset intersection statistics:\n"
f" GT & DET = {len(set_intersection)}\n"
f" GT - DET = {len(set_gt_minus_det)}\n"
f" DET - GT = {len(set_det_minus_gt)}\n"
)
result = SupportedDatasetSpecificationData()
# Now create the supported dataset from the intersection set
idx = 0
for vol_id in sorted(list(set_intersection)):
cur_gt_file = gt_files.table.loc[vol_id]
cur_det_file = det_files.table.loc[vol_id]
# Now iterate over every ground truth label
for gt_label, gt in cur_gt_file["hdf5_meta"]["groundtruth"].items():
result.table.loc[idx] = {
"label_id": gt_label,
"volume_id": vol_id,
"is_negative": False, # For now, no SOC data used here
"target_id": gt["target_id"],
"subset": None, # Since this is not based on a data split, we don't have a subset
"class_name": gt["class_name"],
"hdf5_detection": cur_det_file,
"hdf5_groundtruth": cur_gt_file,
}
idx += 1
# import pandas
# pandas.set_option("display.max_rows", None, "display.max_columns", None)
# pandas.set_option("display.max_colwidth", 200, "display.width", 5000)
# print(result.table)
# result.table.to_html("test.htm")
return result
[docs]@uval_stage
def support_dataset_with_file_paths(gt_files: Hdf5FilesData, det_files: Hdf5FilesData, soc_files: Hdf5FilesData):
"""Given a dataset and sets of Hdf5 files found on disk, try to combine everything to
create a supported dataset."""
# Let GT be a set of ground truth volume ids, DET be a set of detection volume ids
# Determine GT & DET set, and the size of the GT-DET and DET-GT sets
gt_ids = set(gt_files.table["volume_id"])
det_ids = set(det_files.table["volume_id"])
soc_ids = set(soc_files.table["volume_id"])
set_union = gt_ids | det_ids | soc_ids
set_intersection = gt_ids & det_ids
set_gt_minus_det = gt_ids - det_ids
set_det_minus_gt = det_ids - gt_ids
if len(set_union) == 0:
logger.error("Did not find any matching files between groundtruth and detections")
return SupportedDatasetSpecificationData() # Return empty set
# This is important feedback for the user to see if this matches their expectations
logger.info(
f"Dataset union statistics:\n"
f" GT & DET = {len(set_intersection)}\n"
f" GT - DET = {len(set_gt_minus_det)}\n"
f" DET - GT = {len(set_det_minus_gt)}\n"
f" DET + GT + SOC = {len(set_union)}\n"
)
result = SupportedDatasetSpecificationData()
# Now create the supported dataset from the intersection set
idx = 0
for vol_id in sorted(list(set_intersection)):
# Now iterate over every ground truth label
result.table.loc[idx] = {
"volume_id": vol_id,
"is_negative": False, # For now, no SOC data used here
"hdf5_detection": det_files.table.loc[vol_id]["hdf5_meta"]["detections"],
"hdf5_groundtruth": gt_files.table.loc[vol_id]["hdf5_meta"]["groundtruth"],
}
idx += 1
for vol_id in sorted(list(soc_ids)):
# Now iterate over every ground truth label
result.table.loc[idx] = {
"volume_id": vol_id,
"is_negative": True,
"hdf5_detection": soc_files.table.loc[vol_id]["hdf5_meta"]["detections"],
"hdf5_groundtruth": None,
}
idx += 1
return result