Source code for kartothek.io.testing.delete

from .utils import create_dataset


[docs]def test_delete_dataset(store_factory, metadata_version, bound_delete_dataset): """ Ensure that a dataset can be deleted """ create_dataset("dataset", store_factory, metadata_version) store = store_factory() assert len(list(store.keys())) > 0 bound_delete_dataset("dataset", store_factory) assert len(list(store.keys())) == 0
[docs]def test_delete_single_dataset(store_factory, metadata_version, bound_delete_dataset): """ Ensure that only the specified dataset is deleted """ create_dataset("dataset", store_factory, metadata_version) create_dataset("another_dataset", store_factory, metadata_version) store = store_factory() amount_of_keys = len(list(store.keys())) assert len(list(store.keys())) > 0 bound_delete_dataset("dataset", store_factory) assert len(list(store.keys())) == amount_of_keys / 2, store.keys()
[docs]def test_delete_only_dataset(store_factory, metadata_version, bound_delete_dataset): """ Ensure that files including the UUID but not starting with it are not deleted """ create_dataset("UUID", store_factory, metadata_version) store = store_factory() store.put(key="prefixUUID", data=b"") bound_delete_dataset("UUID", store_factory) assert "prefixUUID" in store.keys()
[docs]def test_delete_missing_dataset(store_factory, store_factory2, bound_delete_dataset): """ Ensure that a dataset can be deleted even though some keys are already removed. """ metadata_version = 4 create_dataset("dataset", store_factory, metadata_version) store = store_factory() keys = sorted(store.keys()) assert len(keys) > 0 store2 = store_factory2() for missing in keys: if missing == "dataset.by-dataset-metadata.json": continue for k in keys: if k != missing: store2.put(k, store.get(k)) bound_delete_dataset("dataset", store_factory2) assert len(list(store2.keys())) == 0
[docs]def test_delete_dataset_unreferenced_files( store_factory, metadata_version, bound_delete_dataset ): """ Ensure that unreferenced files of a dataset are also removed when a dataset is deleted """ uuid = "dataset" create_dataset(uuid, store_factory, metadata_version) store = store_factory() store.put(f"{uuid}/table/trash.parquet", b"trash") assert len(list(store.keys())) > 0 bound_delete_dataset(uuid, store_factory) assert len(list(store.keys())) == 0