diff --git a/.gitignore b/.gitignore index 8557fda0e53f27ad72a90c2036f53438fbfd1da6..d3458c0fbb7ac9284a423a75dd45f97bcc589b84 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ build dist draft .vscode +.coverage public*/ *venv/ +*.xml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bdb8804917beb7397f8bc78da33479b259a53e0c..170300aecee6b689b0c8734feeb225b048aab057 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,7 @@ variables: IMPL_MODULE_NAME: theia_dumper PYTHON_IMG: python:3.12-slim + PACKAGE_INSTALL_EXTRAS: "[test]" DOC_BUILD_SELF: true @@ -36,16 +37,24 @@ Test Upload: DINAMIS_SDK_SECRET_KEY=${CI_VAR_DINAMIS_SDK_SECRET_KEY} python tests/test_upload.py -Test Get: +Pytest: extends: .static_analysis_with_pip_install stage: Test except: [main] script: - - python tests/test_get.py - -Test Diff: - extends: .static_analysis_with_pip_install - stage: Test - except: [main] - script: - - python tests/test_diff.py + - DINAMIS_SDK_ACCESS_KEY=${CI_VAR_DINAMIS_SDK_ACCESS_KEY} + DINAMIS_SDK_SECRET_KEY=${CI_VAR_DINAMIS_SDK_SECRET_KEY} + coverage run -m pytest -rsv --junit-xml=report.xml + - coverage report + - coverage xml + - coverage html + coverage: '/^TOTAL.+?(\d+\%)$/' + artifacts: + paths: + - htmlcov/ + when: always + reports: + coverage_report: + coverage_format: cobertura + path: coverage.xml + junit: report.xml diff --git a/pyproject.toml b/pyproject.toml index 05c6645e83b9da64c6e571b8803c30138e3e4e25..a23c8e67abc050da05be42cf86bb164becedce46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ] +[project.optional-dependencies] +test = ["pytest", "coverage"] [project.scripts] theia_dumper = "theia_dumper.cli:theia_dumper" @@ -44,7 +46,7 @@ pretty = true exclude = ["doc", "venv", ".venv"] [tool.pylint] -disable = "W1203,R0903,E0401,W0622,C0116,C0115,W0719" +disable = "W1203,R0903,E0401,W0622,C0116,C0115,W0718,W0719" [tool.pylint.MASTER] ignore-paths = '^.venv' diff --git a/tests/test_diff.py b/tests/test_diff.py index 3e0aebb0d04109fa25a1a2a7e6b3372c07643e6d..33e3af581f19902bb6e6171187b526bfa685e881 100755 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -1,33 +1,43 @@ """Test file.""" -import test_upload import pystac +import test_upload +import utils + +from theia_dumper import diff, stac + + +def test_diff(): + """Test diff.""" + + utils.set_secret_key_env() -from theia_dumper import stac, diff + col1, items = test_upload.create_items_and_collection( + relative=True, col_href="/tmp/collection.json" + ) + col2 = col1.full_copy() + item = items[0].full_copy() + item.id += "_test" + col2.add_item(item, item.id) -col1, items = test_upload.create_items_and_collection( - relative=True, col_href="/tmp/collection.json" -) -col2 = col1.full_copy() + item = items[0].full_copy() + item.id += "_test_other" + col1.add_item(item, item.id) -item = items[0].full_copy() -item.id += "_test" -col2.add_item(item, item.id) + diff.generate_items_diff(col1, col2) + diff.collections_defs_are_different(col1, col2) -item = items[0].full_copy() -item.id += "_test_other" -col1.add_item(item, item.id) + col1_filepath = "/tmp/col1.json" + col1.set_self_href(col1_filepath) + col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) -diff.generate_items_diff(col1, col2) -diff.collections_defs_are_different(col1, col2) + diff.compare_local_and_upstream( + stac.StacTransactionsHandler(stac.DEFAULT_STAC_EP), + col1_filepath, + "costarica-sentinel-2-l3-seasonal-spectral-indices-M", + ) -COL1_FILEPATH = "/tmp/col1.json" -col1.set_self_href(COL1_FILEPATH) -col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) -diff.compare_local_and_upstream( - stac.StacTransactionsHandler(stac.DEFAULT_STAC_EP), - COL1_FILEPATH, - "costarica-sentinel-2-l3-seasonal-spectral-indices-M", -) +if __name__ == "__main__": + test_diff() diff --git a/tests/test_get.py b/tests/test_get.py index 652169c1b0ae095eaf0cb447aca561ff3a1aba74..3441da72f02382769443503d8de938949aa50d7a 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,16 +1,25 @@ """Test file.""" -from theia_dumper import stac, cli +import utils +from theia_dumper import cli, stac -handler = stac.StacTransactionsHandler( - stac_endpoint=cli.DEFAULT_STAC_EP, -) -REMOTE_COL_ID = "spot-6-7-drs" -handler.list_collections_display() -handler.list_col_items_display(REMOTE_COL_ID) +def test_get(): + """Test get.""" + utils.set_secret_key_env() + handler = stac.StacTransactionsHandler( + stac_endpoint=cli.DEFAULT_STAC_EP, + ) -col_remote = handler.get_remote_col(REMOTE_COL_ID) + remote_col_id = "spot-6-7-drs" + handler.list_collections_display() + handler.list_col_items_display(remote_col_id) -col_items = handler.list_col_items(REMOTE_COL_ID) + handler.get_remote_col(remote_col_id) + + handler.list_col_items(remote_col_id) + + +if __name__ == "__main__": + test_get() diff --git a/tests/test_upload.py b/tests/test_upload.py index 35e37994b6330174253d805bcccbe33f8d50a149..972864252a71d2a2aae2e495b143a7d235d30290 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -8,9 +8,12 @@ from datetime import datetime import pystac import pystac_client import requests +import utils +import pytest from theia_dumper import stac +utils.set_secret_key_env() DEFAULT_COL_HREF = "http://hello.fr/collections/collection-for-tests" STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" @@ -21,7 +24,8 @@ IMAGE_HREF = ( COL_ID = "collection-for-theia-dumper-tests" items_ids = ["item_1", "item_2"] RASTER_FILE1 = "/tmp/raster1.tif" -RASTER_FILE2 = "/tmp/folder/raster2.tif" +RASTER_FILE2 = "/tmp/folder1/raster2.tif" +RASTER_FILE3 = "/tmp/folder/raster3.tif" handler = stac.StacUploadTransactionsHandler( stac_endpoint=STAC_EP, @@ -35,6 +39,8 @@ with open(RASTER_FILE1, "wb") as f: f.write(r.content) os.makedirs(os.path.dirname(RASTER_FILE2), exist_ok=True) shutil.copyfile(RASTER_FILE1, RASTER_FILE2) +os.makedirs(os.path.dirname(RASTER_FILE3), exist_ok=True) +shutil.copyfile(RASTER_FILE1, RASTER_FILE3) COL_BBOX = [0.0, 0.0, 0.0, 0.0] BBOX_ALL = [ @@ -97,6 +103,7 @@ def create_item(item_id: str): assets={ "ndvi": pystac.Asset(href=RASTER_FILE1), "crswir": pystac.Asset(href=RASTER_FILE2), + "ndwi": pystac.Asset(href=RASTER_FILE3), }, ) @@ -155,53 +162,57 @@ def generate_item_collection(file_pth, relative=True): icol.save_object(file_pth) -def test_item_collection(): +@pytest.mark.parametrize("assets_overwrite", [False, True]) +@pytest.mark.parametrize("relative", [False, True]) +def test_item_collection(assets_overwrite, relative): """Test item collection.""" - for relative in [True, False]: - print(f"Relative: {relative}") - # we need to create an empty collection before - col = create_collection(DEFAULT_COL_HREF) - handler.publish_collection(collection=col) + handler.assets_overwrite = assets_overwrite + # we need to create an empty collection before + col = create_collection(DEFAULT_COL_HREF) + handler.publish_collection(collection=col) - with tempfile.NamedTemporaryFile() as tmp: - generate_item_collection(tmp.name, relative=relative) - handler.load_and_publish(tmp.name) - remote_col_test(BBOX_ALL) - clear() + with tempfile.NamedTemporaryFile() as tmp: + generate_item_collection(tmp.name, relative=relative) + handler.load_and_publish(tmp.name) + remote_col_test(BBOX_ALL) + clear() -def test_collection(): +@pytest.mark.parametrize("assets_overwrite", [False, True]) +@pytest.mark.parametrize("relative", [False, True]) +def test_collection(assets_overwrite, relative): """Test collection.""" - for relative in [True, False]: - print(f"\nRelative: {relative}") - with tempfile.TemporaryDirectory() as tmpdir: - generate_collection(tmpdir, relative=relative) - handler.load_and_publish(os.path.join(tmpdir, "collection.json")) - remote_col_test(BBOX_ALL) - clear() + handler.assets_overwrite = assets_overwrite + with tempfile.TemporaryDirectory() as tmpdir: + generate_collection(tmpdir, relative=relative) + handler.load_and_publish(os.path.join(tmpdir, "collection.json")) + remote_col_test(BBOX_ALL) + clear() -def test_collection_multipart(): +@pytest.mark.parametrize("assets_overwrite", [False, True]) +@pytest.mark.parametrize("relative", [False, True]) +def test_collection_multipart(assets_overwrite, relative): """Test collection.""" - for relative in [True, False]: - print(f"\nRelative: {relative}") - for item_id in items_ids: - with tempfile.TemporaryDirectory() as tmpdir: - generate_collection( - tmpdir, relative=relative, items=[create_item(item_id)] - ) - handler.load_and_publish(os.path.join(tmpdir, "collection.json")) - remote_col_test(BBOX_ALL) - clear() + print(f"\nRelative: {relative}") + handler.assets_overwrite = assets_overwrite + for item_id in items_ids: + with tempfile.TemporaryDirectory() as tmpdir: + generate_collection(tmpdir, relative=relative, items=[create_item(item_id)]) + handler.load_and_publish(os.path.join(tmpdir, "collection.json")) + remote_col_test(BBOX_ALL) + clear() def _test_all(): - test_collection() - - test_item_collection() + for relative in [False, True]: + for assets_overwrite in [False, True]: + handler.assets_overwrite = assets_overwrite - test_collection_multipart() + test_collection(assets_overwrite, relative) + test_item_collection(assets_overwrite, relative) + test_collection_multipart(assets_overwrite, relative) if __name__ == "__main__": diff --git a/tests/utils.py b/tests/utils.py new file mode 100755 index 0000000000000000000000000000000000000000..cd3867a8a91a853b53d4eac3253ce4288f481611 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,18 @@ +"""Utils file.""" + +import os + +import dinamis_sdk +import dinamis_sdk.settings + + +def set_secret_key_env(): + """Test diff.""" + if "DINAMIS_SDK_ACCESS_KEY" in os.environ: + dinamis_sdk.settings.ENV.dinamis_sdk_access_key = os.environ.get( + "DINAMIS_SDK_ACCESS_KEY" + ) + if "DINAMIS_SDK_SECRET_KEY" in os.environ: + dinamis_sdk.settings.ENV.dinamis_sdk_secret_key = os.environ.get( + "DINAMIS_SDK_SECRET_KEY" + ) diff --git a/theia_dumper/__init__.py b/theia_dumper/__init__.py index 9824c4c6fe8788bc6df47ffc17e3d475478458ab..4ffdd6069f04aa7dc2e0059ca09d90ca8b0cfc70 100644 --- a/theia_dumper/__init__.py +++ b/theia_dumper/__init__.py @@ -1,3 +1,3 @@ """Theia dumper package.""" -__version__ = "0.1.1" +__version__ = "0.1.2" diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index e33c557fbaae4a636573cff01cf0fe130639a889..24f1faabe62e4f5d128150c16e3f951fb6edeea8 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -13,6 +13,7 @@ import pystac_client import requests from pystac import Collection, Item, ItemCollection from requests.adapters import HTTPAdapter, Retry +from rich.pretty import pretty_repr from .logger import logger @@ -42,26 +43,19 @@ def create_session(): """Create a requests session.""" sess = requests.Session() retries = Retry( - total=5, + total=3, backoff_factor=1, status_forcelist=[ - 400, - 403, 408, - 410, 419, - 421, - 422, - 424, 425, - 429, 500, 502, 503, 504, - 505, ], allowed_methods=frozenset(["PUT", "POST"]), + raise_on_status=False, ) adapter = HTTPAdapter(max_retries=retries) sess.mount("http://", adapter=adapter) @@ -69,23 +63,30 @@ def create_session(): return sess -def asset_exists(url: str) -> bool: - """Check that the item provided in parameter exists and is accessible.""" +def asset_exists(asset_url: str) -> None | str: + """Check that the item provided in parameter exists and is accessible. + + If asset exists, returns asset url + """ sess = create_session() - res = sess.get(dinamis_sdk.sign(url), stream=True) + asset_url_signed = dinamis_sdk.sign(asset_url) + res = sess.get(asset_url_signed, stream=True) if res.status_code == 200: - logger.info("Asset %s already exists. Skipping.", url) - return True - return False + logger.info("Asset %s already exists.", asset_url) + return asset_url + return None def post_or_put(url: str, data: dict): """Post or put data to url.""" headers = dinamis_sdk.get_headers() sess = create_session() + resp = sess.post(url, json=data, headers=headers, timeout=10) + if resp.status_code == 409: # Exists, so update + logger.info(f"Item at {url} already exists, doing a PUT") resp = sess.put( f"{url}/{data['id']}", json=data, @@ -99,7 +100,10 @@ def post_or_put(url: str, data: dict): try: resp.raise_for_status() except Exception as e: - logger.error("Server returned: %s", resp.text) + try: + logger.error("Server returned: %s", pretty_repr(resp.json())) + except Exception: + logger.error("Server returned: %s", resp.text) raise e @@ -127,12 +131,12 @@ def get_assets_root_dir(items: List[Item]) -> str: If the the common prefix is not a folder (/tmp/test1/a.tif, /tmp/test2/b.tif), returns /tmp. """ - prefix = os.path.commonprefix( + prefix = os.path.commonpath( [asset.href for item in items for asset in item.assets.values()] ) if os.path.isdir(prefix): - return prefix - return os.path.dirname(prefix) + return prefix + "/" + return os.path.dirname(prefix) + "/" def check_items_collection_id(items: List[Item]): @@ -259,11 +263,13 @@ class StacUploadTransactionsHandler(StacTransactionsHandler): local_filename = asset.href logger.debug("Local file: %s", local_filename) - target_url = local_filename.replace(assets_root_dir, tgt_bucket_root_url) + file_relative_path = local_filename.replace(assets_root_dir, "") + target_url = urljoin(tgt_bucket_root_url, file_relative_path) + print(target_url) # Check that url part after storage bucket is compliant _check_naming_is_compliant( - target_url.replace(tgt_bucket_root_url, ""), + file_relative_path, allow_dot=True, allow_slash=True, ) @@ -271,7 +277,8 @@ class StacUploadTransactionsHandler(StacTransactionsHandler): # Skip when target file exists and overwrite is not enabled if not self.assets_overwrite: - if asset_exists(target_url): + if asset_href := asset_exists(target_url): + asset.href = asset_href continue # Upload file