# -*- coding: utf-8 -*-
"""A StageData object acts like a data container that manages results generated by stages and read by subsequent
stages.
Depending on the stage, each class that inherits from StageData will contain different things, documented below.
Many of them will make use of pandas DataFrames to keep the data in a table-like shape.
"""
from abc import ABC
from typing import List
from pandas import DataFrame # type: ignore
from uval.utils.log import logger
[docs]class StageData(ABC):
"""The abstract base class for all data that is passed around between stages.
For now, we don't need anything here, but we will sure need this later,
for example to store and restore the data when using caching."""
pass
[docs]class TableStageData(StageData):
"""A subclass for all data that is table-like and can be handled using pandas"""
frame_columns = [] # type: List[str]
def __init__(self):
self.table = DataFrame(columns=self.frame_columns)
[docs] def to_html(self, file_path: str):
self.table.to_html(file_path)
[docs] def to_csv(self, file_path: str):
self.table.to_csv(file_path)
[docs] def from_dict_as_rows(self, data_dict: dict, check_data_validity: bool = False) -> bool:
# Creates a DataFrame from a dictionary as a row for each dictionary key
self.table = DataFrame.from_dict(data_dict, orient="index")
# checking the number of rows after assignment
if check_data_validity and len(data_dict) != self.table.shape[0]:
logger.error("Unequal number of rows between the DataFrame and Dict after import!")
# raise ValueError("Unequal number of rows")
return False
# checking the columns of the first row after assignment
elif check_data_validity and len(data_dict.keys()) > 0:
first_key = list(data_dict.keys())[0]
if len(data_dict[first_key]) != self.table.shape[1]:
logger.error("Unequal number of columns between the DataFrame and Dict after import!")
# raise ValueError("Unequal number of columns")
return False
return True
[docs]class DatasetSpecificationData(TableStageData):
"""Contains a list of targets to be detected. Every entry / row refers to a single ground truth
target. So multiple rows can refer to the same volume_id if there are multiple targets in one volume.
for more information please see the The Data-split YAML format and HDF5 format in the uval docs.
"""
frame_columns = ["volume_id", "label_id", "is_negative", "subset", "class_name"]
def __init__(self):
super().__init__()
[docs]class SupportedDatasetSpecificationData(DatasetSpecificationData):
"""Similar to DatasetSpecificationData, but in addition, every entry is supported
by Hdf5 files for ground truth and detections"""
frame_columns = DatasetSpecificationData.frame_columns + ["hdf5_detection", "hdf5_groundtruth", "target_id"]
def __init__(self):
super().__init__()
[docs]class Hdf5FilesData(TableStageData):
"""Keeps a list of files that are available on disk.
The files can be identified by bag id.
For each bag id, contains the file path and the file_meta and volume_meta information.
Also stores detections and groundtruth and their scores and box data etc. but not the masks or projections."""
frame_columns = ["volume_id", "file_path", "file_stat", "hdf5_meta"]
def __init__(self):
super().__init__()
[docs]class BagLevelAlarmsData(TableStageData):
"""For every bag id, stores the number of alarms on this bag"""
frame_columns = ["volume_id", "num_alarms"]
def __init__(self):
super().__init__()
[docs]class DetectionMetricsData(TableStageData):
"""For every bag id, for every GT object, stores the matched detection alarm (index) and its overlap.
In addition, for every bag, stores the detection alarm indices that do not have a matching ground truth"""
pass