Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-7134 - Re-implement Aggregations Stats for all Catalog Browsers #100

Open
wants to merge 36 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
18c46e5
datastore: add facet support in mongodb datastore, #TASK-7151, #TASK-…
jtarraga Oct 24, 2024
5495d80
datastore: improve code, #TASK-7151, #TASK-7134
jtarraga Oct 25, 2024
304603e
datastore: implement the MongoDB to FacetField converter, #TASK-7151,…
jtarraga Oct 25, 2024
b537e6f
datastore: fix MongoDB document to FacetField converter, #TASK-7151, …
jtarraga Oct 25, 2024
17f83b2
datastore: change long to Long in FacetField, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
865b94a
datastore: set range format to field[start..end]:step, #TASK-7151, #T…
jtarraga Oct 28, 2024
880f2c6
datastore: use JsonInclude.Include.NON_NULL, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
75dc002
datastore: fix pom.xml, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
25cbd91
datastore: restore FacetField to previous change, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
9f0d9b9
datastore: change count to Number, #TASK-7151, #TASK-7134
jtarraga Oct 29, 2024
0f3a24d
test: add JUnit tests for facets, #TASK-7151, #TASK-7134
jtarraga Oct 29, 2024
f2b080c
mongodb: rename converter, use Long instead Number, #TASK-7151, #TASL…
jtarraga Nov 11, 2024
ea3906c
mongodb: support lists using accumulators, #TASK-7151, #TASK-7134
jtarraga Nov 12, 2024
e68c30e
mongodb: fix sonnar issues, #TASK-7151, #TASK-7134
jtarraga Nov 13, 2024
84d1f92
mondodb: add 'sum' to aggregation operators enum
imedina Dec 12, 2024
26c9628
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
10a7f0c
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
7943e1b
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
e8159f3
mondodb: fix check style
imedina Dec 12, 2024
005c45e
datastore: fix the accumulator 'sum' in MongoDB facets, #TASK-7151, #…
jtarraga Dec 12, 2024
31424d8
mongodb: aggregation test. To be reverted.
imedina Dec 13, 2024
57f2138
mongodb: aggregation test 2. To be reverted.
imedina Dec 13, 2024
3cca26f
mongodb: aggregation test 3. To be reverted.
imedina Dec 13, 2024
e177dd7
mongodb: aggregation test 4. To be reverted.
imedina Dec 13, 2024
b15ed9a
mongodb: revert all tests
imedina Dec 13, 2024
a7c86e0
mongodb: fix aggregation regex
imedina Dec 13, 2024
eb5b519
mongodb: aggregation style improvement
imedina Dec 13, 2024
a073e84
mongodb: fix aggregation regex
imedina Dec 13, 2024
3f9386f
mongodb: fix aggregation
imedina Dec 13, 2024
13b3e59
mongodb: fix aggregation parse
imedina Dec 13, 2024
dd39812
datastore: implement the facet following the example:bioformat:sum(si…
jtarraga Dec 13, 2024
de98cda
Merge branch 'TASK-7134' of https://github.com/opencb/java-common-lib…
jtarraga Dec 13, 2024
ac66d66
datastore: fix facet 'format:count(size)' to behaviour as 'count(form…
jtarraga Dec 14, 2024
421d5ce
datastore: improve MongoDB facets for arrays by using unwind, #TASK-7…
jtarraga Jan 16, 2025
1184be3
datastore: fix MongoDB facet parser, #TASK-7151, #TASK-7134
jtarraga Jan 17, 2025
7255b42
datastore: fix the converter by replacing '.' by '.' in the facet…
jtarraga Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@
/**
* Created by jtarraga on 09/03/17.
*/

public class FacetField {
private String name;
private long count;
private Long count;
private List<Bucket> buckets;
private String aggregationName;
private List<Double> aggregationValues;
Expand All @@ -38,6 +37,13 @@ public FacetField(String name, long count, List<Bucket> buckets) {
this.buckets = buckets;
}

public FacetField(String name, long count, String aggregationName, List<Double> aggregationValues) {
this.name = name;
this.count = count;
this.aggregationName = aggregationName;
this.aggregationValues = aggregationValues;
}

public FacetField(String name, String aggregationName, List<Double> aggregationValues) {
this.name = name;
this.aggregationName = aggregationName;
Expand Down Expand Up @@ -68,17 +74,20 @@ public FacetField setName(String name) {
return this;
}

public long getCount() {
public Long getCount() {
return count;
}

public FacetField setCount(long count) {
public FacetField setCount(Long count) {
this.count = count;
return this;
}

public FacetField addCount(long delta) {
this.count += delta;
if (this.count == null) {
this.count = 0L;
}
this.count = this.count.longValue() + delta;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ private <T> DataResult<T> endQuery(List result, long numMatches, double start) {
long end = System.currentTimeMillis();
int numResults = (result != null) ? result.size() : 0;

DataResult<T> queryResult = new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null);
return queryResult;
return new DataResult((int) (end - start), Collections.emptyList(), numResults, result, numMatches, null);
}

private DataResult endWrite(long start) {
Expand Down Expand Up @@ -331,31 +330,25 @@ public <T> DataResult<T> aggregate(List<? extends Bson> operations, ComplexTypeC
QueryOptions options) {

long start = startQuery();

DataResult<T> queryResult;
MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
// MongoCursor<Document> iterator = output.iterator();
List<T> list = new LinkedList<>();
if (queryResultWriter != null) {
try {
queryResultWriter.open();
if (operations != null && !operations.isEmpty()) {
MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
if (queryResultWriter != null) {
try {
queryResultWriter.open();
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
list.add(iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
// if (converter != null) {
// while (iterator.hasNext()) {
// list.add(converter.convertToDataModelType(iterator.next()));
// }
// } else {
while (iterator.hasNext()) {
list.add((T) iterator.next());
}
// }
}
queryResult = endQuery(list, start);
return queryResult;
Expand Down Expand Up @@ -435,7 +428,7 @@ public DataResult update(ClientSession clientSession, List<? extends Bson> queri

return endWrite(
wr.getMatchedCount(),
wr.getInsertedCount() + wr.getUpserts().size(),
(long) wr.getInsertedCount() + wr.getUpserts().size(),
wr.getModifiedCount(),
wr.getDeletedCount(),
0,
Expand Down Expand Up @@ -553,8 +546,7 @@ public DataResult createIndex(Bson keys, ObjectMap options) {
}

mongoDBNativeQuery.createIndex(keys, i);
DataResult dataResult = endQuery(Collections.emptyList(), start);
return dataResult;
return endQuery(Collections.emptyList(), start);
}

public void dropIndexes() {
Expand All @@ -564,15 +556,13 @@ public void dropIndexes() {
public DataResult dropIndex(Bson keys) {
long start = startQuery();
mongoDBNativeQuery.dropIndex(keys);
DataResult dataResult = endQuery(Collections.emptyList(), start);
return dataResult;
return endQuery(Collections.emptyList(), start);
}

public DataResult<Document> getIndex() {
long start = startQuery();
List<Document> index = mongoDBNativeQuery.getIndex();
DataResult<Document> queryResult = endQuery(index, start);
return queryResult;
return endQuery(index, start);
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
package org.opencb.commons.datastore.mongodb;

import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.opencb.commons.datastore.core.ComplexTypeConverter;
import org.opencb.commons.datastore.core.FacetField;

import java.util.*;

import static org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter.TO_REPLACE_DOTS;
import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*;
import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*;

public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter<List<FacetField>, Document> {

@Override
public List<FacetField> convertToDataModelType(Document document) {
if (document == null || document.entrySet().size() == 0) {
return Collections.emptyList();
}

String facetFieldName;
List<FacetField> facets = new ArrayList<>();
for (Map.Entry<String, Object> entry : document.entrySet()) {
String key = entry.getKey();
List<Document> documentValues = (List<Document>) entry.getValue();
if (key.endsWith(COUNTS_SUFFIX) || key.endsWith(FACET_ACC_SUFFIX)) {
List<FacetField.Bucket> buckets = new ArrayList<>(documentValues.size());
long total = 0;
for (Document documentValue : documentValues) {
long counter = documentValue.getInteger(count.name());
String bucketValue = "";
Object internalIdValue = documentValue.get(INTERNAL_ID);
if (internalIdValue instanceof String) {
bucketValue = (String) internalIdValue;
} else if (internalIdValue instanceof Boolean
|| internalIdValue instanceof Integer
|| internalIdValue instanceof Long
|| internalIdValue instanceof Double) {
bucketValue = internalIdValue.toString();
} else if (internalIdValue instanceof Document) {
bucketValue = StringUtils.join(((Document) internalIdValue).values(), AND_SEPARATOR);
}

List<FacetField> bucketFacetFields = null;
if (key.endsWith(FACET_ACC_SUFFIX)) {
String[] split = key.split(SEPARATOR);
String name = split[2];
String aggregationName = split[1];
Double value;
if (documentValue.get(aggregationName) instanceof Integer) {
value = 1.0d * documentValue.getInteger(aggregationName);
} else if (documentValue.get(aggregationName) instanceof Long) {
value = 1.0d * documentValue.getLong(aggregationName);
} else {
value = documentValue.getDouble(aggregationName);
}
List<Double> aggregationValues = Collections.singletonList(value);
FacetField facetField = new FacetField(name.replace(TO_REPLACE_DOTS, "."), aggregationName, aggregationValues);
// Perhaps it’s redundant, as it is also set in the bucket
facetField.setCount(counter);
bucketFacetFields = Collections.singletonList(facetField);
}

buckets.add(new FacetField.Bucket(bucketValue, counter, bucketFacetFields));
total += counter;
}
facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, ".");
facets.add(new FacetField(facetFieldName, total, buckets));
} else if (key.endsWith(RANGES_SUFFIX)) {
List<Double> facetFieldValues = new ArrayList<>();
Number start = null;
Number end = null;
Number step = null;
Double other = null;
for (Document value : documentValues) {
if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) {
other = 1.0d * value.getInteger(count.name());
} else {
Double range = value.getDouble(INTERNAL_ID);
Integer counter = value.getInteger(count.name());
facetFieldValues.add(1.0d * counter);
if (start == null) {
start = range;
}
end = range;
if (step == null && start != end) {
step = end.doubleValue() - start.doubleValue();
}
}
}
facetFieldName = key.split(SEPARATOR)[0].replace(TO_REPLACE_DOTS, ".");
if (other != null) {
facetFieldName += " (counts out of range: " + other + ")";
}
FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues)
.setStart(start)
.setEnd(end)
.setStep(step);
facets.add(facetField);
} else {
if (key.endsWith(RANGES_SUFFIX)) {
List<Double> facetFieldValues = new ArrayList<>();
Number start = null;
Number end = null;
Number step = null;
Double other = null;
for (Document value : documentValues) {
if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) {
other = 1.0d * value.getInteger(count.name());
} else {
Double range = value.getDouble(INTERNAL_ID);
Integer counter = value.getInteger(count.name());
facetFieldValues.add(1.0d * counter);
if (start == null) {
start = range;
}
end = range;
if (step == null && start != end) {
step = end.doubleValue() - start.doubleValue();
}
}
}
facetFieldName = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(TO_REPLACE_DOTS, ".");
if (other != null) {
facetFieldName += " (counts out of range: " + other + ")";
}
FacetField facetField = new FacetField(facetFieldName, "range", facetFieldValues)
.setStart(start)
.setEnd(end)
.setStep(step);
facets.add(facetField);
} else {
Document documentValue = ((List<Document>) entry.getValue()).get(0);
MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue);
switch (accumulator) {
case sum:
case avg:
case max:
case min:
case stdDevPop:
case stdDevSamp: {
List<Double> fieldValues = new ArrayList<>();
if (documentValue.get(accumulator.name()) instanceof Integer) {
fieldValues.add(1.0d * documentValue.getInteger(accumulator.name()));
} else if (documentValue.get(accumulator.name()) instanceof Long) {
fieldValues.add(1.0d * documentValue.getLong(accumulator.name()));
} else if (documentValue.get(accumulator.name()) instanceof List) {
List<Number> list = (List<Number>) documentValue.get(accumulator.name());
for (Number number : list) {
fieldValues.add(number.doubleValue());
}
} else {
fieldValues.add(documentValue.getDouble(accumulator.name()));
}
long count = 0;
if (documentValue.containsKey("count")) {
count = Long.valueOf(documentValue.getInteger("count"));
}
facetFieldName = documentValue.getString(INTERNAL_ID).replace(TO_REPLACE_DOTS, ".");
facets.add(new FacetField(facetFieldName, count, accumulator.name(), fieldValues));
break;
}
default: {
// Do nothing, exception is raised
}
}
}
}
}
return facets;
}

private MongoDBQueryUtils.Accumulator getAccumulator(Document document) {
for (Map.Entry<String, Object> entry : document.entrySet()) {
try {
MongoDBQueryUtils.Accumulator accumulator = MongoDBQueryUtils.Accumulator.valueOf(entry.getKey());
return accumulator;
} catch (IllegalArgumentException e) {
// Do nothing
}
}
throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ", ")
+ "Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, sum, max, min, avg, stdDevPop, stdDevSamp), ","));
}

@Override
public Document convertToStorageType(List<FacetField> facetFields) {
throw new RuntimeException("Not yet implemented");
}
}
Loading
Loading