From ad79886bbd1fff4af428584933da75183e852d92 Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 21 Jun 2023 10:27:42 +0200 Subject: [PATCH 1/5] FIX: #8 (trop d'url a signer) --- dinamis_sdk/s3.py | 57 ++++++++++++++++++++++++-------------- doc/processing_examples.md | 2 +- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/dinamis_sdk/s3.py b/dinamis_sdk/s3.py index 9545c3b..730494c 100644 --- a/dinamis_sdk/s3.py +++ b/dinamis_sdk/s3.py @@ -22,9 +22,11 @@ from pystac.serialization.identify import identify_stac_object_type from pystac.utils import datetime_to_str import pystac_client from pystac_client import ItemSearch +import math from .utils import log, SIGNED_URL_TTL_MARGIN, CREDENTIALS +MAX_URLS = 64 S3_STORAGE_DOMAIN = "minio-api-dinamis.apps.okd.crocc.meso.umontpellier.fr" S3_SIGNING_ENDPOINT = \ "https://s3-signing-dinamis.apps.okd.crocc.meso.umontpellier.fr/" @@ -471,28 +473,41 @@ def get_signed_urls( adapter = requests.adapters.HTTPAdapter(max_retries=retry) session.mount("http://", adapter) session.mount("https://", adapter) - response = session.post( - f"{S3_SIGNING_ENDPOINT}sign_urls", - params={"urls": not_signed_urls}, - headers=headers - ) - response.raise_for_status() - - signed_url_batch = SignedURLBatch(**response.json()) - if not signed_url_batch: - raise ValueError( - f"No signed url batch found in response: {response.json()}" - ) - if not all(key in signed_url_batch.hrefs - for key in not_signed_urls): - raise ValueError( - f"URLs to sign are {not_signed_urls} but returned signed URLs" - f"are for {signed_url_batch.hrefs.keys()}" + n_urls = len(not_signed_urls) + log.debug("Number of URLs to sign: %s", n_urls) + n_chunks = math.ceil(n_urls / MAX_URLS) + log.debug("Number of chunks of URLs to sign: %s", n_chunks) + for i_chunk in range(n_chunks): + log.debug("Processing chunk %s/%s", i_chunk + 1, n_chunks) + chunk_start = i_chunk * MAX_URLS + chunk_end = min(chunk_start + MAX_URLS, n_urls) + not_signed_urls_chunk = not_signed_urls[chunk_start:chunk_end] + response = session.post( + f"{S3_SIGNING_ENDPOINT}sign_urls", + params={"urls": not_signed_urls_chunk}, + headers=headers ) - for url, href in signed_url_batch.hrefs.items(): - signed_url = SignedURL(expiry=signed_url_batch.expiry, href=href) - CACHE[url] = signed_url - signed_urls[url] = signed_url + response.raise_for_status() + + signed_url_batch = SignedURLBatch(**response.json()) + if not signed_url_batch: + raise ValueError( + f"No signed url batch found in response: {response.json()}" + ) + if not all(key in signed_url_batch.hrefs + for key in not_signed_urls_chunk): + raise ValueError( + f"URLs to sign are {not_signed_urls_chunk} but returned " + f"signed URLs" + f"are for {signed_url_batch.hrefs.keys()}" + ) + for url, href in signed_url_batch.hrefs.items(): + signed_url = SignedURL( + expiry=signed_url_batch.expiry, + href=href + ) + CACHE[url] = signed_url + signed_urls[url] = signed_url log.debug( "Got signed urls %s in %s seconds", signed_urls, diff --git a/doc/processing_examples.md b/doc/processing_examples.md index e2c719d..1199ed2 100644 --- a/doc/processing_examples.md +++ b/doc/processing_examples.md @@ -5,7 +5,7 @@ To process remote COG files, the following software must be up-to-date: | Software | Minimum version | |----------|-----------------| | GDAL | 3.4.1 | -| OTB | 8.1.1 | +| OTB | 8.1.2 | | PyOTB | 1.5.4 | All examples begin with importing `pystac_client` and `dinamis_sdk` and -- GitLab From 0b5b605d49b7a4b57e64f32b26895d759ed3fc31 Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 21 Jun 2023 10:28:18 +0200 Subject: [PATCH 2/5] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ced02f4..43d9d9b 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ install_requires = [ setup( name="dinamis-sdk", - version="0.0.9", + version="0.0.10", description="DINAMIS SDK", python_requires=">=3.8", author="Remi Cresson", -- GitLab From 0bfbd449a03d8ef7b30a58459b4a9ee3867d5f85 Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 21 Jun 2023 10:32:06 +0200 Subject: [PATCH 3/5] Import order --- dinamis_sdk/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dinamis_sdk/s3.py b/dinamis_sdk/s3.py index 730494c..e983014 100644 --- a/dinamis_sdk/s3.py +++ b/dinamis_sdk/s3.py @@ -13,6 +13,7 @@ from datetime import datetime, timezone from functools import singledispatch from typing import Any, Dict, Mapping, TypeVar, cast, List from urllib.parse import urlparse, parse_qs +import math import urllib3.util.retry import requests import requests.adapters @@ -22,7 +23,6 @@ from pystac.serialization.identify import identify_stac_object_type from pystac.utils import datetime_to_str import pystac_client from pystac_client import ItemSearch -import math from .utils import log, SIGNED_URL_TTL_MARGIN, CREDENTIALS -- GitLab From 340c561c4ec9065110f64cb9432ce79a03fd855d Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 21 Jun 2023 10:42:13 +0200 Subject: [PATCH 4/5] Add: tests --- .gitlab-ci.yml | 15 +++++++++++++-- tests/test_spot-6-7-drs.py | 16 ++++++++++++++++ tests/test_super-s2.py | 15 +++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 tests/test_spot-6-7-drs.py create mode 100644 tests/test_super-s2.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7e0a26c..9bbe990 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,8 +10,8 @@ stages: - Static Analysis - Install - Documentation + - Test - Ship -# - Test # ------------------------------ Static analysis ------------------------------ @@ -75,6 +75,17 @@ pages: script: - mkdocs build --site-dir public +# --------------------------------- Test -------------------------------------- + +Tests: + stage: Test + before_script: + - pip install . + - pip install pystac-client + script: + - python tests/test_spot-6-7-drs.py + - python tests/test_super-s2.py + # --------------------------------- Ship -------------------------------------- pypi: @@ -86,4 +97,4 @@ pypi: script: - python3 -m build after_script: - - python3 -m twine upload --repository-url https://upload.pypi.org/legacy/ --non-interactive -u __token__ -p $pypi_token dist/* \ No newline at end of file + - python3 -m twine upload --repository-url https://upload.pypi.org/legacy/ --non-interactive -u __token__ -p $pypi_token dist/* diff --git a/tests/test_spot-6-7-drs.py b/tests/test_spot-6-7-drs.py new file mode 100644 index 0000000..6d7f925 --- /dev/null +++ b/tests/test_spot-6-7-drs.py @@ -0,0 +1,16 @@ +import dinamis_sdk +import pystac_client + +api = pystac_client.Client.open( + 'https://stacapi-dinamis.apps.okd.crocc.meso.umontpellier.fr', + modifier=dinamis_sdk.sign_inplace, +) +res = api.search( + bbox=[-3.75, 30, 10, 60], + datetime=["2017-01-01", "2022-12-31"], + collections=["spot-6-7-drs"] +) +urls = [item.assets['xs'].href for item in res.items()] +print(len(urls)) +assert len(urls) > 1000 + diff --git a/tests/test_super-s2.py b/tests/test_super-s2.py new file mode 100644 index 0000000..5442f74 --- /dev/null +++ b/tests/test_super-s2.py @@ -0,0 +1,15 @@ +import dinamis_sdk +import pystac_client + +api = pystac_client.Client.open( + 'https://stacapi-dinamis.apps.okd.crocc.meso.umontpellier.fr', + modifier=dinamis_sdk.sign_inplace, +) +res = api.search( + bbox=[3.75, 43.58, 3.95, 43.67], + datetime=["2017-01-01", "2022-12-31"], + collections=["super-sentinel-2-l2a"] +) +urls = [item.assets['img'].href for item in res.items()] +assert len(urls) == 672 + -- GitLab From c7aa747f93050d6def7bfd0daca7c9521ba09bb8 Mon Sep 17 00:00:00 2001 From: Remi Cresson <remi.cresson@inrae.fr> Date: Wed, 21 Jun 2023 10:47:09 +0200 Subject: [PATCH 5/5] Add: tests --- tests/test_spot-6-7-drs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_spot-6-7-drs.py b/tests/test_spot-6-7-drs.py index 6d7f925..814e232 100644 --- a/tests/test_spot-6-7-drs.py +++ b/tests/test_spot-6-7-drs.py @@ -10,7 +10,7 @@ res = api.search( datetime=["2017-01-01", "2022-12-31"], collections=["spot-6-7-drs"] ) -urls = [item.assets['xs'].href for item in res.items()] +urls = [item.assets['src_xs'].href for item in res.items()] print(len(urls)) assert len(urls) > 1000 -- GitLab