Source code for kartothek.io_components.cube.cleanup

from functools import reduce

from kartothek.core.cube.constants import KTK_CUBE_UUID_SEPERATOR
from kartothek.utils.ktk_adapters import get_dataset_keys

__all__ = ("get_keys_to_clean",)


[docs]def get_keys_to_clean(cube_uuid_prefix, datasets, store): """ Get the keys that are present in the store but can be deleted. Parameters ---------- store: simplekv.KeyValueStore KV store. datasets: Dict[str, kartothek.core.dataset.DatasetMetadata] Datasets to scan for keys. Returns ------- keys: Set[str] Keys to delete. """ keys_should = reduce( set.union, (get_dataset_keys(ds) for ds in datasets.values()), set() ) keys_present = { k for k in store.iter_keys(cube_uuid_prefix + KTK_CUBE_UUID_SEPERATOR) } return keys_present - keys_should