From f68fb1f8c18c29e44b0d858f5f6afdc406cf1549 Mon Sep 17 00:00:00 2001 From: jnizet <jb@ninja-squad.com> Date: Thu, 2 Sep 2021 11:57:55 +0200 Subject: [PATCH] feat: implement sitemaps --- .../domain/data/LocationSitemapVO.java | 36 +++++ .../data/germplasm/GermplasmSitemapVO.java | 38 ++++++ .../domain/data/study/StudySitemapVO.java | 40 ++++++ .../elasticsearch/ESScrollIterator.java | 7 + .../document/DocumentAnnotationUtil.java | 3 +- .../document/DocumentMetadata.java | 12 +- .../document/annotation/Document.java | 1 + .../impl/ESGenericFindRepository.java | 7 +- .../repository/es/GermplasmRepository.java | 6 + .../es/GermplasmRepositoryImpl.java | 7 + .../repository/es/LocationRepository.java | 4 + .../repository/es/LocationRepositoryImpl.java | 19 +++ .../repository/es/StudyRepository.java | 4 + .../repository/es/StudyRepositoryImpl.java | 11 ++ .../fr/inra/urgi/faidare/utils/Sitemaps.java | 125 ++++++++++++++++++ .../web/germplasm/GermplasmController.java | 36 ++++- .../urgi/faidare/web/site/SiteController.java | 25 ++++ .../web/sitemap/SitemapIndexController.java | 57 ++++++++ .../faidare/web/study/StudyController.java | 28 ++++ backend/src/main/resources/application.yml | 1 + .../document/DocumentAnnotationUtilTest.java | 1 + .../document/fixture/ComplexDocument.java | 2 +- .../es/GermplasmRepositoryTest.java | 9 ++ .../repository/es/LocationRepositoryTest.java | 12 ++ .../repository/es/StudyRepositoryTest.java | 10 ++ .../germplasm/GermplasmControllerTest.java | 89 +++++++++++++ .../faidare/web/site/SiteControllerTest.java | 62 +++++++++ .../sitemap/SitemapIndexControllerTest.java | 34 +++++ .../web/study/StudyControllerTest.java | 97 ++++++++++++++ 29 files changed, 775 insertions(+), 8 deletions(-) create mode 100644 backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java create mode 100644 backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java create mode 100644 backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java create mode 100644 backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java create mode 100644 backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java create mode 100644 backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java create mode 100644 backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java create mode 100644 backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java create mode 100644 backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java new file mode 100644 index 00000000..1ee34df2 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/LocationSitemapVO.java @@ -0,0 +1,36 @@ +package fr.inra.urgi.faidare.domain.data; + +import java.util.List; + +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiLocation; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a location containing only its ID, used to generate sitemaps + */ +@Document(type = "location", includedFields = "locationDbId") +public class LocationSitemapVO { + + @Id + private String locationDbId; + + public LocationSitemapVO() { + } + + public LocationSitemapVO(String locationDbId) { + this.locationDbId = locationDbId; + } + + public String getLocationDbId() { + return locationDbId; + } + + public void setLocationDbId(String locationDbId) { + this.locationDbId = locationDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java new file mode 100644 index 00000000..b7dc2681 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/germplasm/GermplasmSitemapVO.java @@ -0,0 +1,38 @@ +package fr.inra.urgi.faidare.domain.data.germplasm; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.Nulls; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiGermplasm; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a germplasm, containing only its ID, used for sitemaps + */ +@Document(type = "germplasm", includedFields = "germplasmDbId") +public class GermplasmSitemapVO { + + @Id + private String germplasmDbId; + + public GermplasmSitemapVO() { + } + + public GermplasmSitemapVO(String germplasmDbId) { + this.germplasmDbId = germplasmDbId; + } + + public String getGermplasmDbId() { + return germplasmDbId; + } + + public void setGermplasmDbId(String germplasmDbId) { + this.germplasmDbId = germplasmDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java new file mode 100644 index 00000000..c4900361 --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/domain/data/study/StudySitemapVO.java @@ -0,0 +1,40 @@ +package fr.inra.urgi.faidare.domain.data.study; + +import java.util.Date; +import java.util.List; +import java.util.Set; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiStudySummary; +import fr.inra.urgi.faidare.domain.data.GnpISInternal; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURI; +import fr.inra.urgi.faidare.domain.jsonld.data.HasURL; +import fr.inra.urgi.faidare.domain.jsonld.data.IncludedInDataCatalog; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Document; +import fr.inra.urgi.faidare.elasticsearch.document.annotation.Id; + +/** + * A minimal view of a study containing only its ID, used to generate sitemaps + */ +@Document(type = "study", includedFields = "studyDbId") +public class StudySitemapVO { + + @Id + private String studyDbId; + + public StudySitemapVO() { + } + + public StudySitemapVO(String studyDbId) { + this.studyDbId = studyDbId; + } + + public String getStudyDbId() { + return studyDbId; + } + + public void setStudyDbId(String studyDbId) { + this.studyDbId = studyDbId; + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java index f106e9b7..04b8d4e2 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/ESScrollIterator.java @@ -68,6 +68,13 @@ public class ESScrollIterator<T> implements Iterator<T> { .size(fetchSize) .sort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + // Add included and excluded fields if requested + String[] includedFields = documentMetadata.getIncludedFields(); + String[] excludedFields = documentMetadata.getExcludedFields(); + if ((includedFields != null && includedFields.length >= 1) || (excludedFields != null && excludedFields.length >= 1)) { + request.source().fetchSource(includedFields, excludedFields); + } + SearchResponse response = null; try { response = client.search(request, RequestOptions.DEFAULT); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java index a521970d..eb63d3e3 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtil.java @@ -54,8 +54,9 @@ public class DocumentAnnotationUtil { Map<String, DocumentMetadata.Field> fields = findDocumentFields(ImmutableList.<String>of(), valueObjectClass); + String[] includedFields = document.includedFields(); String[] excludedFields = document.excludedFields(); - metadata = new DocumentMetadata<>(documentType, idFieldName, valueObjectClass, excludedFields, fields); + metadata = new DocumentMetadata<>(documentType, idFieldName, valueObjectClass, includedFields, excludedFields, fields); metadataCache.put(valueObjectClass, metadata); } return metadata; diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java index 6bf7389b..a54e37e3 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentMetadata.java @@ -17,15 +17,21 @@ public class DocumentMetadata<VO> { private final String documentType; private final String idField; private final Class<VO> documentClass; + private final String[] includedFields; private final String[] excludedFields; private final Map<String, Field> fieldsByName; private final Map<List<String>, Field> fieldByPath; - public DocumentMetadata(String documentType, String idField, Class<VO> documentClass, String[] excludedFields, + public DocumentMetadata(String documentType, + String idField, + Class<VO> documentClass, + String[] includedFields, + String[] excludedFields, Map<String, Field> fieldsByName) { this.documentType = documentType; this.idField = idField; this.documentClass = documentClass; + this.includedFields = includedFields; this.excludedFields = excludedFields; this.fieldsByName = fieldsByName; this.fieldByPath = flattenDocumentFieldTree(ImmutableList.<String>of(), fieldsByName); @@ -57,6 +63,10 @@ public class DocumentMetadata<VO> { return idField; } + public String[] getIncludedFields() { + return includedFields; + } + public String[] getExcludedFields() { return excludedFields; } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java index df4af121..3d0585a4 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/document/annotation/Document.java @@ -13,5 +13,6 @@ import java.lang.annotation.Target; public @interface Document { String type(); + String[] includedFields() default {}; String[] excludedFields() default {}; } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java index 54e7b287..e1122d86 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/elasticsearch/repository/impl/ESGenericFindRepository.java @@ -90,10 +90,11 @@ public class ESGenericFindRepository<C extends PaginationCriteria, VO> implement request.source().sort(field, order); } - // Add excluded fields if requested + // Add included and excluded fields if requested + String[] includedFields = documentMetadata.getIncludedFields(); String[] excludedFields = documentMetadata.getExcludedFields(); - if (excludedFields != null && excludedFields.length >= 1) { - request.source().fetchSource(null, excludedFields); + if ((includedFields != null && includedFields.length >= 1) || (excludedFields != null && excludedFields.length >= 1)) { + request.source().fetchSource(includedFields, excludedFields); } Logger logger = LoggerFactory.getLogger(ESGenericFindRepository.class); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java index dafff525..739feea0 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepository.java @@ -3,6 +3,7 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.criteria.FaidareGermplasmPOSTShearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmMcpdVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; @@ -31,6 +32,11 @@ public interface GermplasmRepository */ GermplasmVO getById(String germplasmDbId); + /** + * Scroll through all germplasms, using the given fetch size + */ + Iterator<GermplasmSitemapVO> scrollAllForSitemap(int fetchSize); + /** * Scroll through all germplasm matching the given criteria. */ diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java index 31a6e1a8..360087ce 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryImpl.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import fr.inra.urgi.faidare.domain.criteria.FaidareGermplasmPOSTShearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmMcpdVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; @@ -90,6 +91,12 @@ public class GermplasmRepositoryImpl implements GermplasmRepository { this.criteriaMapping = AnnotatedCriteriaMapper.getMapping(criteriaClass); } + @Override + public Iterator<GermplasmSitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, GermplasmSitemapVO.class, query, fetchSize); + } + @Override public Iterator<GermplasmVO> scrollAll(GermplasmSearchCriteria criteria) { QueryBuilder query = queryFactory.createQuery(criteria); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java index 707f51bc..93e65b54 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepository.java @@ -1,6 +1,9 @@ package fr.inra.urgi.faidare.repository.es; +import java.util.Iterator; + import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.repository.ESFindRepository; @@ -21,4 +24,5 @@ public interface LocationRepository @Override PaginatedList<LocationVO> find(LocationCriteria criteria); + Iterator<LocationSitemapVO> scrollAllForSitemap(int fetchSize); } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java index af74a55c..0a1133d7 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryImpl.java @@ -1,11 +1,18 @@ package fr.inra.urgi.faidare.repository.es; +import java.util.Iterator; + import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.elasticsearch.ESRequestFactory; import fr.inra.urgi.faidare.elasticsearch.ESResponseParser; +import fr.inra.urgi.faidare.elasticsearch.ESScrollIterator; import fr.inra.urgi.faidare.elasticsearch.repository.impl.BaseESRepository; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -17,6 +24,10 @@ public class LocationRepositoryImpl extends BaseESRepository<LocationCriteria, LocationVO> implements LocationRepository { + private final RestHighLevelClient client; + private final ESRequestFactory requestFactory; + private final ESResponseParser parser; + @Autowired public LocationRepositoryImpl( RestHighLevelClient client, @@ -24,6 +35,14 @@ public class LocationRepositoryImpl ESResponseParser parser ) { super(client, requestFactory, LocationVO.class, parser); + this.client = client; + this.requestFactory = requestFactory; + this.parser = parser; } + @Override + public Iterator<LocationSitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, LocationSitemapVO.class, query, fetchSize); + } } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java index 6ceeca9f..8831a8c1 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepository.java @@ -1,12 +1,15 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.criteria.StudyCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.repository.ESFindRepository; import fr.inra.urgi.faidare.elasticsearch.repository.ESGetByIdRepository; +import java.util.Iterator; import java.util.Set; /** @@ -29,4 +32,5 @@ public interface StudyRepository */ Set<String> getVariableIds(String studyDbId); + Iterator<StudySitemapVO> scrollAllForSitemap(int fetchSize); } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java index d9410a87..46b0b712 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryImpl.java @@ -2,12 +2,15 @@ package fr.inra.urgi.faidare.repository.es; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiLocation; import fr.inra.urgi.faidare.domain.criteria.StudyCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.elasticsearch.ESRequestFactory; import fr.inra.urgi.faidare.elasticsearch.ESResponseParser; +import fr.inra.urgi.faidare.elasticsearch.ESScrollIterator; import fr.inra.urgi.faidare.elasticsearch.document.DocumentAnnotationUtil; import fr.inra.urgi.faidare.elasticsearch.document.DocumentMetadata; import fr.inra.urgi.faidare.elasticsearch.query.impl.ESGenericQueryFactory; @@ -19,6 +22,8 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.slf4j.Logger; @@ -28,6 +33,7 @@ import org.springframework.stereotype.Repository; import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -129,4 +135,9 @@ public class StudyRepositoryImpl return new LinkedHashSet<>(ids); } + @Override + public Iterator<StudySitemapVO> scrollAllForSitemap(int fetchSize) { + QueryBuilder query = QueryBuilders.matchAllQuery(); + return new ESScrollIterator<>(client, requestFactory, parser, StudySitemapVO.class, query, fetchSize); + } } diff --git a/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java b/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java new file mode 100644 index 00000000..ba32b8cf --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/utils/Sitemaps.java @@ -0,0 +1,125 @@ +package fr.inra.urgi.faidare.utils; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.Spliterators; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.support.ServletUriComponentsBuilder; + +/** + * A generator of site maps. + * @author JB Nizet + */ +@Component +public class Sitemaps { + public static final int BUCKET_COUNT = 11; + + public static <T> void generateSitemap(String sitemapPath, + OutputStream out, + Iterator<T> entryIterator, + Predicate<T> entryPredicate, + Function<T, String> entryToPath) { + SanityChecker sanityChecker = new SanityChecker(sitemapPath); + + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); + Stream<T> entries = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(entryIterator, 0), false); + entries.filter(entryPredicate) + .map(entryToPath) + .map(entryPath -> Sitemaps.generateSitemapUrl(entryPath) + '\n') + .forEach(entry -> { + try { + writer.write(entry); + sanityChecker.addEntry(entry); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + + try { + writer.flush(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + sanityChecker.check(); + } + + public static String generateSitemapUrl(String path) { + return ServletUriComponentsBuilder + .fromCurrentContextPath() + .path(path) + .toUriString(); + } + + private static class SanityChecker { + private static final Logger LOGGER = LoggerFactory.getLogger(SanityChecker.class); + + private static final int MAX_ENTRY_COUNT = 50_000; + private static final int MAX_BYTE_COUNT = 50 * 1024 * 1024; + + private static final int DANGER_ENTRY_COUNT = 40_000; + private static final int DANGER_BYTE_COUNT = 40 * 1024 * 1024; + + private final String sitemapPath; + private int entryCount = 0; + private int byteCount = 0; + + public SanityChecker(String sitemapPath) { + this.sitemapPath = sitemapPath; + } + + public void addEntry(String entry) { + entryCount++; + byteCount += entry.length(); + } + + public void check() { + if (entryCount > MAX_ENTRY_COUNT) { + LOGGER.error("The generated sitemap at path " + + sitemapPath + + " has more than " + + MAX_ENTRY_COUNT + + " entries and will thus be rejected by search engines. Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } else if (entryCount > DANGER_ENTRY_COUNT) { + LOGGER.warn("The generated sitemap at path " + + sitemapPath + + " has more than " + + DANGER_ENTRY_COUNT + + " entries and is thus approaching the max of " + + MAX_ENTRY_COUNT + + ". Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } + + if (byteCount > MAX_BYTE_COUNT) { + LOGGER.error("The generated sitemap at path " + + sitemapPath + + " has more than " + + MAX_BYTE_COUNT + + " bytes and will thus be rejected by search engines. Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } else if (entryCount > DANGER_ENTRY_COUNT) { + LOGGER.warn("The generated sitemap at path " + + sitemapPath + + " has more than " + + DANGER_BYTE_COUNT + + " bytes and is thus approaching the max of " + + MAX_BYTE_COUNT + + ". Increase Sitemaps.BUCKET_COUNT for a better distribution of sitemap entries."); + } + } + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java index cd24d06d..973d2de3 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/germplasm/GermplasmController.java @@ -4,9 +4,16 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; +import java.util.Spliterators; import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.servlet.http.HttpServletRequest; + +import com.google.common.collect.Streams; import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiGermplasmAttributeValue; @@ -18,25 +25,30 @@ import fr.inra.urgi.faidare.domain.data.germplasm.DonorVO; import fr.inra.urgi.faidare.domain.data.germplasm.GenealogyVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmAttributeValueVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmInstituteVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.InstituteVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.PhotoVO; import fr.inra.urgi.faidare.domain.data.germplasm.PuiNameValueVO; import fr.inra.urgi.faidare.domain.data.germplasm.SiblingVO; -import fr.inra.urgi.faidare.domain.data.germplasm.SimpleVO; import fr.inra.urgi.faidare.domain.data.germplasm.SiteVO; import fr.inra.urgi.faidare.domain.data.germplasm.TaxonSourceVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.GermplasmAttributeRepository; import fr.inra.urgi.faidare.repository.es.GermplasmRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a germplasm card based on its ID. @@ -49,7 +61,7 @@ public class GermplasmController { private final GermplasmRepository germplasmRepository; private final FaidareProperties faidareProperties; private final XRefDocumentRepository xRefDocumentRepository; - private GermplasmAttributeRepository germplasmAttributeRepository; + private final GermplasmAttributeRepository germplasmAttributeRepository; public GermplasmController(GermplasmRepository germplasmRepository, FaidareProperties faidareProperties, @@ -87,6 +99,26 @@ public class GermplasmController { return toModelAndView(germplasms.get(0)); } + + @GetMapping(value = "/sitemap-{index}.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap(@PathVariable("index") int index) { + if (index < 0 || index >= Sitemaps.BUCKET_COUNT) { + throw new NotFoundException("no sitemap for this index"); + } + StreamingResponseBody body = out -> { + Iterator<GermplasmSitemapVO> iterator = germplasmRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/germplasms/sitemap-" + index + ".txt", + out, + iterator, + vo -> Math.floorMod(vo.getGermplasmDbId().hashCode(), Sitemaps.BUCKET_COUNT) == index, + vo -> "/germplasms/" + vo.getGermplasmDbId() + ); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private ModelAndView toModelAndView(GermplasmVO germplasm) { // List<BrapiGermplasmAttributeValue> attributes = getAttributes(germplasm); // List<XRefDocumentVO> crossReferences = xRefDocumentRepository.find( diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java index 151da527..3b05139d 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/site/SiteController.java @@ -1,20 +1,29 @@ package fr.inra.urgi.faidare.web.site; import java.util.Arrays; +import java.util.Iterator; import java.util.List; +import javax.servlet.http.HttpServletRequest; + import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.brapi.v1.data.BrapiAdditionalInfo; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.LocationRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a site card based on its ID. @@ -64,6 +73,22 @@ public class SiteController { ); } + @GetMapping(value = "/sitemap.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap() { + StreamingResponseBody body = out -> { + Iterator<LocationSitemapVO> iterator = locationRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/sites/sitemap.txt", + out, + iterator, + vo -> true, + vo -> "/sites/" + vo.getLocationDbId() + ); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private LocationVO createSite() { LocationVO site = new LocationVO(); site.setLocationName("France"); diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java new file mode 100644 index 00000000..1dd480df --- /dev/null +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexController.java @@ -0,0 +1,57 @@ +package fr.inra.urgi.faidare.web.sitemap; + +import java.nio.charset.StandardCharsets; + +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +/** + * Controller used to generate the sitemap index containing links to the site sitemap + * (unique), the N sitemaps for the studies and the N sitemaps for the germplasms. + * + * Sitemaps for studies and germplasms are split in N buckets because sitemaps + * can't be more than 50 MB and can't have more than 50,000 entries. + * Splitting them in N sitemaps makes it almost sure that none of the sitemaps + * overflows those limits. + * + * The sitemaps are split based on the hashCode of the ID of the document: + * if a document's hash code modulo N is 0, then it's in the sitemap-0.txt, etc. + * + * @author JB Nizet + */ +@RestController +@RequestMapping("") +public class SitemapIndexController { + @GetMapping("/sitemap.xml") + public ResponseEntity<byte[]> sitemapIndex() { + StringBuilder builder = new StringBuilder(); + builder + .append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") + .append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"); + + appendSiteMap(builder, "/sites/sitemap.txt"); + for (int i = 0; i < Sitemaps.BUCKET_COUNT; i++) { + appendSiteMap(builder, "/germplasms/sitemap-" + i + ".txt"); + } + for (int i = 0; i < Sitemaps.BUCKET_COUNT; i++) { + appendSiteMap(builder, "/studies/sitemap-" + i + ".txt"); + } + builder.append("</sitemapindex>"); + + return ResponseEntity.ok() + .contentType(MediaType.TEXT_XML) + .body(builder.toString().getBytes(StandardCharsets.UTF_8)); + } + + public void appendSiteMap(StringBuilder builder, String path) { + builder.append(" <sitemap>\n") + .append(" <loc>") + .append(Sitemaps.generateSitemapUrl(path)) + .append("</loc>\n") + .append(" </sitemap>\n"); + } +} diff --git a/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java b/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java index 833c4547..e9a9ac46 100644 --- a/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java +++ b/backend/src/main/java/fr/inra/urgi/faidare/web/study/StudyController.java @@ -3,19 +3,24 @@ package fr.inra.urgi.faidare.web.study; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import javax.servlet.http.HttpServletRequest; + import com.google.common.collect.Lists; import fr.inra.urgi.faidare.api.NotFoundException; import fr.inra.urgi.faidare.config.FaidareProperties; import fr.inra.urgi.faidare.domain.criteria.GermplasmPOSTSearchCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; import fr.inra.urgi.faidare.domain.data.TrialVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.variable.ObservationVariableVO; import fr.inra.urgi.faidare.domain.xref.XRefDocumentVO; import fr.inra.urgi.faidare.repository.es.GermplasmRepository; @@ -24,13 +29,18 @@ import fr.inra.urgi.faidare.repository.es.StudyRepository; import fr.inra.urgi.faidare.repository.es.TrialRepository; import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; import fr.inra.urgi.faidare.repository.file.CropOntologyRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; import fr.inra.urgi.faidare.web.site.MapLocation; import org.apache.logging.log4j.util.Strings; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.servlet.ModelAndView; +import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; /** * Controller used to display a study card based on its ID. @@ -104,6 +114,24 @@ public class StudyController { ); } + @GetMapping(value = "/sitemap-{index}.txt") + @ResponseBody + public ResponseEntity<StreamingResponseBody> sitemap(@PathVariable("index") int index) { + if (index < 0 || index >= Sitemaps.BUCKET_COUNT) { + throw new NotFoundException("no sitemap for this index"); + } + StreamingResponseBody body = out -> { + Iterator<StudySitemapVO> iterator = studyRepository.scrollAllForSitemap(1000); + Sitemaps.generateSitemap( + "/sudies/sitemap-" + index + ".txt", + out, + iterator, + vo -> Math.floorMod(vo.getStudyDbId().hashCode(), Sitemaps.BUCKET_COUNT) == index, + vo -> "/studies/" + vo.getStudyDbId()); + }; + return ResponseEntity.ok().contentType(MediaType.TEXT_PLAIN).body(body); + } + private LocationVO getLocation(StudyDetailVO study) { if (Strings.isBlank(study.getLocationDbId())) { return null; diff --git a/backend/src/main/resources/application.yml b/backend/src/main/resources/application.yml index 2b684f60..206012ca 100644 --- a/backend/src/main/resources/application.yml +++ b/backend/src/main/resources/application.yml @@ -86,6 +86,7 @@ server: - text/html - text/css - text/markdown + - text/plain port: 8380 servlet: context-path: /faidare-dev diff --git a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java index b8a86407..a712bfe4 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/DocumentAnnotationUtilTest.java @@ -50,6 +50,7 @@ class DocumentAnnotationUtilTest { .getDocumentObjectMetadata(ComplexDocument.class); assertThat(metadata).isNotNull(); assertThat(metadata.getDocumentType()).isEqualTo("dataObject4"); + assertThat(metadata.getIncludedFields()).containsExactly("id", "nested0"); assertThat(metadata.getExcludedFields()).containsExactly("a", "b"); assertThat(metadata.getIdField()).isEqualTo("@id"); diff --git a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java index dd9a2c78..e799a70a 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/elasticsearch/document/fixture/ComplexDocument.java @@ -11,7 +11,7 @@ import java.util.List; /** * @author gcornut */ -@Document(type = "dataObject4", excludedFields = {"a", "b"}) +@Document(type = "dataObject4", includedFields = { "id", "nested0"}, excludedFields = {"a", "b"}) public class ComplexDocument { @Id(jsonName = "@id") String id; diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java index 76285c69..7705ab8a 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/GermplasmRepositoryTest.java @@ -6,12 +6,14 @@ import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.GermplasmGETSearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmPOSTSearchCriteria; import fr.inra.urgi.faidare.domain.criteria.GermplasmSearchCriteria; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmVO; import fr.inra.urgi.faidare.domain.data.germplasm.PedigreeVO; import fr.inra.urgi.faidare.domain.data.germplasm.ProgenyVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.domain.response.Pagination; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; +import org.assertj.core.data.Index; import org.assertj.core.util.Lists; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -28,6 +30,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Objects; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; @@ -126,6 +129,12 @@ class GermplasmRepositoryTest { assertThat(list).isNotNull().hasSize(0); } + @Test + void shouldScrollAllForSitemap() { + Iterator<GermplasmSitemapVO> list = repository.scrollAllForSitemap(100); + assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getGermplasmDbId().isEmpty()); + } @Test void should_Scroll_By_accessionNumber() { diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java index 88bcef7e..d7700556 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/LocationRepositoryTest.java @@ -3,7 +3,9 @@ package fr.inra.urgi.faidare.repository.es; import com.google.common.collect.Sets; import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.LocationCriteria; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; import org.junit.jupiter.api.BeforeAll; @@ -16,6 +18,7 @@ import org.springframework.context.annotation.Import; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit.jupiter.SpringExtension; +import java.util.Iterator; import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; @@ -114,4 +117,13 @@ class LocationRepositoryTest { assertThat(locations).isNotNull().hasSize(3); assertThat(locations).extracting("locationType").containsOnlyElementsOf(expectedTypes); } + + @Test + void shouldScrollAllForSitemap() { + Iterator<LocationSitemapVO> list = repository.scrollAllForSitemap(100); + assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getLocationDbId().isEmpty()); + } + + } diff --git a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java index 42b23287..fc73d3f6 100644 --- a/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java +++ b/backend/src/test/java/fr/inra/urgi/faidare/repository/es/StudyRepositoryTest.java @@ -4,7 +4,9 @@ import com.google.common.collect.Sets; import fr.inra.urgi.faidare.Application; import fr.inra.urgi.faidare.domain.criteria.StudySearchCriteria; import fr.inra.urgi.faidare.domain.data.LocationVO; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudyDetailVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; import fr.inra.urgi.faidare.domain.data.study.StudySummaryVO; import fr.inra.urgi.faidare.domain.response.PaginatedList; import fr.inra.urgi.faidare.repository.es.setup.ESSetUp; @@ -20,6 +22,7 @@ import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit.jupiter.SpringExtension; import java.util.Comparator; +import java.util.Iterator; import java.util.Set; import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; @@ -157,6 +160,13 @@ class StudyRepositoryTest { assertThat(result).extracting(sortField).isSortedAccordingTo(new DescendingOrder()); } + @Test + void shouldScrollAllForSitemap() { + Iterator<StudySitemapVO> list = repository.scrollAllForSitemap(100); + Assertions.assertThat(list).isNotEmpty() + .allMatch(vo -> !vo.getStudyDbId().isEmpty()); + } + private class DescendingOrder implements Comparator<Object> { @Override public int compare(Object o1, Object o2) { diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java new file mode 100644 index 00000000..75f6f3b2 --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/germplasm/GermplasmControllerTest.java @@ -0,0 +1,89 @@ +package fr.inra.urgi.faidare.web.germplasm; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.asyncDispatch; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.germplasm.GermplasmSitemapVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; +import fr.inra.urgi.faidare.repository.es.GermplasmAttributeRepository; +import fr.inra.urgi.faidare.repository.es.GermplasmRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import fr.inra.urgi.faidare.web.study.StudyController; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; + +/** + * MVC tests for {@link GermplasmController} + * @author JB Nizet + */ +@WebMvcTest(GermplasmController.class) +public class GermplasmControllerTest { + + @Autowired + private MockMvc mockMvc; + + @MockBean + private GermplasmRepository mockGermplasmRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private GermplasmAttributeRepository mockGermplasmAttributeRepository; + + + @Test + void shouldGenerateSitemap() throws Exception { + List<GermplasmSitemapVO> germplasms = Arrays.asList( + new GermplasmSitemapVO("germplasm1"), + new GermplasmSitemapVO("germplasm4"), + new GermplasmSitemapVO("germplasm45"), + new GermplasmSitemapVO("germplasm73") + ); + + // the hashCode algorithm is specified in the javadoc, so it's guaranteed to be + // the same everywhere + // uncomment the following line to see which sitemap index each study has + // germplasms.forEach(germplasm -> System.out.println(germplasm.getGermplasmDbId() + " = " + Math.floorMod(germplasm.getGermplasmDbId().hashCode(), Sitemaps.BUCKET_COUNT))); + + when(mockGermplasmRepository.scrollAllForSitemap(anyInt())).thenAnswer(invocation -> germplasms.iterator()); + + testSitemap(6, "http://localhost/faidare/germplasms/germplasm1\nhttp://localhost/faidare/germplasms/germplasm45\n"); + testSitemap(9, "http://localhost/faidare/germplasms/germplasm4\nhttp://localhost/faidare/germplasms/germplasm73\n"); + testSitemap(7, ""); + + mockMvc.perform(get("/faidare/germplasms/sitemap-17.txt") + .contextPath("/faidare")) + .andExpect(status().isNotFound()); + } + + private void testSitemap(int index, String expectedContent) throws Exception { + MvcResult mvcResult = mockMvc.perform(get("/faidare/germplasms/sitemap-" + index + ".txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string(expectedContent)); + + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java new file mode 100644 index 00000000..a71bfa4c --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/site/SiteControllerTest.java @@ -0,0 +1,62 @@ +package fr.inra.urgi.faidare.web.site; + +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; +import fr.inra.urgi.faidare.repository.es.LocationRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; +import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import org.springframework.test.web.servlet.result.MockMvcResultMatchers; + +/** + * MVC tests for {@link SiteController} + * @author JB Nizet + */ +@WebMvcTest(SiteController.class) +public class SiteControllerTest { + @Autowired + private MockMvc mockMvc; + + @MockBean + private LocationRepository mockLocationRepository; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @Test + void shouldGenerateSitemap() throws Exception { + List<LocationSitemapVO> sites = Arrays.asList( + new LocationSitemapVO("site1"), + new LocationSitemapVO("site2") + ); + when(mockLocationRepository.scrollAllForSitemap(anyInt())).thenReturn(sites.iterator()); + MvcResult mvcResult = mockMvc.perform(get("/faidare/sites/sitemap.txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string("http://localhost/faidare/sites/site1\nhttp://localhost/faidare/sites/site2\n")); + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java new file mode 100644 index 00000000..ea03832f --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/sitemap/SitemapIndexControllerTest.java @@ -0,0 +1,34 @@ +package fr.inra.urgi.faidare.web.sitemap; + +import static org.junit.jupiter.api.Assertions.*; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import fr.inra.urgi.faidare.utils.Sitemaps; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; + +/** + * MVC tests for {@link SitemapIndexController} + * @author JB Nizet + */ +@WebMvcTest(SitemapIndexController.class) +class SitemapIndexControllerTest { + @Autowired + private MockMvc mockMvc; + + @Test + void shouldGenerateSitemapIndex() throws Exception { + mockMvc.perform(get("/faidare/sitemap.xml").contextPath("/faidare")) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_XML)) + .andExpect(xpath("/sitemapindex/sitemap[1]/loc").string("http://localhost/faidare/sites/sitemap.txt")) + .andExpect(xpath("/sitemapindex/sitemap[2]/loc").string("http://localhost/faidare/germplasms/sitemap-0.txt")) + .andExpect(xpath("/sitemapindex/sitemap[3]/loc").string("http://localhost/faidare/germplasms/sitemap-1.txt")) + .andExpect(xpath("/sitemapindex/sitemap[" + (Sitemaps.BUCKET_COUNT + 2) + "]/loc").string("http://localhost/faidare/studies/sitemap-0.txt")) + .andExpect(xpath("/sitemapindex/sitemap[" + (Sitemaps.BUCKET_COUNT + 3) + "]/loc").string("http://localhost/faidare/studies/sitemap-1.txt")); + } +} diff --git a/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java b/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java new file mode 100644 index 00000000..cfa58e32 --- /dev/null +++ b/backend/src/test/java/fr/inra/urgi/faidare/web/study/StudyControllerTest.java @@ -0,0 +1,97 @@ +package fr.inra.urgi.faidare.web.study; + +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.asyncDispatch; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import java.util.Arrays; +import java.util.List; + +import fr.inra.urgi.faidare.config.FaidareProperties; +import fr.inra.urgi.faidare.domain.data.LocationSitemapVO; +import fr.inra.urgi.faidare.domain.data.study.StudySitemapVO; +import fr.inra.urgi.faidare.repository.es.GermplasmRepository; +import fr.inra.urgi.faidare.repository.es.LocationRepository; +import fr.inra.urgi.faidare.repository.es.StudyRepository; +import fr.inra.urgi.faidare.repository.es.TrialRepository; +import fr.inra.urgi.faidare.repository.es.XRefDocumentRepository; +import fr.inra.urgi.faidare.repository.file.CropOntologyRepository; +import fr.inra.urgi.faidare.utils.Sitemaps; +import fr.inra.urgi.faidare.web.site.SiteController; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.MvcResult; + +/** + * MVC tests for {@link StudyController} + * @author JB Nizet + */ +@WebMvcTest(StudyController.class) +public class StudyControllerTest { + @Autowired + private MockMvc mockMvc; + + @MockBean + private StudyRepository mockStudyRepository; + + @MockBean + private FaidareProperties mockFaidareProperties; + + @MockBean + private XRefDocumentRepository mockXRefDocumentRepository; + + @MockBean + private GermplasmRepository mockGermplasmRepository; + + @MockBean + private CropOntologyRepository mockCropOntologyRepository; + + @MockBean + private TrialRepository mockTrialRepository; + + @MockBean + private LocationRepository mockLocationRepository; + + @Test + void shouldGenerateSitemap() throws Exception { + List<StudySitemapVO> studies = Arrays.asList( + new StudySitemapVO("study1"), + new StudySitemapVO("study4"), + new StudySitemapVO("study51"), + new StudySitemapVO("study72") + ); + + // the hashCode algorithm is specified in the javadoc, so it's guaranteed to be + // the same everywhere + // uncomment the following line to see which sitemap index each study has + // studies.forEach(study -> System.out.println(study.getStudyDbId() + " = " + Math.floorMod(study.getStudyDbId().hashCode(), Sitemaps.BUCKET_COUNT))); + + when(mockStudyRepository.scrollAllForSitemap(anyInt())).thenAnswer(invocation -> studies.iterator()); + testSitemap(6, "http://localhost/faidare/studies/study1\nhttp://localhost/faidare/studies/study72\n"); + testSitemap(9, "http://localhost/faidare/studies/study4\nhttp://localhost/faidare/studies/study51\n"); + testSitemap(7, ""); + + mockMvc.perform(get("/faidare/studies/sitemap-17.txt") + .contextPath("/faidare")) + .andExpect(status().isNotFound()); + } + + private void testSitemap(int index, String expectedContent) throws Exception { + MvcResult mvcResult = mockMvc.perform(get("/faidare/studies/sitemap-" + index + ".txt") + .contextPath("/faidare")) + .andExpect(request().asyncStarted()) + .andReturn(); + + this.mockMvc.perform(asyncDispatch(mvcResult)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.TEXT_PLAIN)) + .andExpect(content().string(expectedContent)); + + } +} -- GitLab