Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to lucene 10.1.0 #791

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ aws = "1.12.768"
grpc = "1.66.0"
jackson = "2.17.2"
log4j = "2.23.1"
lucene = "9.12.0"
lucene = "10.1.0"
prometheus = "1.3.1"
protobuf = "3.25.3"

Expand Down Expand Up @@ -59,7 +59,7 @@ grpc-inprocess = { module = "io.grpc:grpc-inprocess", version.ref = "grpc" }
grpc-testing = { module = "io.grpc:grpc-testing", version.ref = "grpc" }
junit = { module = "junit:junit", version = "4.13.2" }
lucene-test-framework = { module = "org.apache.lucene:lucene-test-framework", version.ref = "lucene" }
mockito-core = { module = "org.mockito:mockito-core", version = "5.12.0" }
mockito-core = { module = "org.mockito:mockito-core", version = "5.14.2" }
s3mock = { module = "io.findify:s3mock_2.13", version = "0.2.6" }
spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version = "0.8" }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;

/** Implements per-index {@link Codec}. */
public class ServerCodec extends Lucene912Codec {
public class ServerCodec extends Lucene101Codec {
private final IndexStateManager stateManager;

// nocommit expose compression control
Expand Down
24 changes: 14 additions & 10 deletions src/main/java/com/yelp/nrtsearch/server/doc/LoadedDocValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -1075,20 +1075,22 @@ public ByteVectorType getValue() {
*/
public static final class SingleSearchVector extends LoadedDocValues<FloatVectorType> {
private final FloatVectorValues vectorValues;
private final KnnVectorValues.DocIndexIterator vectorIterator;
private FloatVectorType value = null;

public SingleSearchVector(FloatVectorValues vectorValues) {
this.vectorValues = vectorValues;
this.vectorIterator = vectorValues != null ? vectorValues.iterator() : null;
}

@Override
public void setDocId(int docID) throws IOException {
if (vectorValues != null) {
if (vectorValues.docID() < docID) {
vectorValues.advance(docID);
if (vectorIterator != null) {
if (vectorIterator.docID() < docID) {
vectorIterator.advance(docID);
}
if (vectorValues.docID() == docID) {
value = new FloatVectorType(vectorValues.vectorValue());
if (vectorIterator.docID() == docID) {
value = new FloatVectorType(vectorValues.vectorValue(vectorIterator.index()));
} else {
value = null;
}
Expand Down Expand Up @@ -1133,20 +1135,22 @@ public FloatVectorType getValue() {
*/
public static final class SingleSearchByteVector extends LoadedDocValues<ByteVectorType> {
private final ByteVectorValues vectorValues;
private final KnnVectorValues.DocIndexIterator vectorIterator;
private ByteVectorType value = null;

public SingleSearchByteVector(ByteVectorValues vectorValues) {
this.vectorValues = vectorValues;
this.vectorIterator = vectorValues != null ? vectorValues.iterator() : null;
}

@Override
public void setDocId(int docID) throws IOException {
if (vectorValues != null) {
if (vectorValues.docID() < docID) {
vectorValues.advance(docID);
if (vectorIterator != null) {
if (vectorIterator.docID() < docID) {
vectorIterator.advance(docID);
}
if (vectorValues.docID() == docID) {
value = new ByteVectorType(vectorValues.vectorValue());
if (vectorIterator.docID() == docID) {
value = new ByteVectorType(vectorValues.vectorValue(vectorIterator.index()));
} else {
value = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getScriptFacetResult(
int totalDocs = 0;
// run script against all match docs, and aggregate counts
for (MatchingDocs matchingDocs : drillDowns.getMatchingDocs()) {
FacetScript script = segmentFactory.newInstance(matchingDocs.context);
DocIdSetIterator iterator = matchingDocs.bits.iterator();
FacetScript script = segmentFactory.newInstance(matchingDocs.context());
DocIdSetIterator iterator = matchingDocs.bits().iterator();
if (iterator == null) {
continue;
}
Expand All @@ -216,8 +216,8 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getDocValuesFacetResul
int totalDocs = 0;
// get doc values for all match docs, and aggregate counts
for (MatchingDocs matchingDocs : drillDowns.getMatchingDocs()) {
LoadedDocValues<?> docValues = fieldDef.getDocValues(matchingDocs.context);
DocIdSetIterator iterator = matchingDocs.bits.iterator();
LoadedDocValues<?> docValues = fieldDef.getDocValues(matchingDocs.context());
DocIdSetIterator iterator = matchingDocs.bits().iterator();
if (iterator == null) {
continue;
}
Expand Down Expand Up @@ -462,7 +462,7 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(
luceneFacets =
new FastTaxonomyFacetCounts(
indexFieldName,
searcherAndTaxonomyManager.taxonomyReader,
searcherAndTaxonomyManager.taxonomyReader(),
indexState.getFacetsConfig(),
c);
} else {
Expand All @@ -478,7 +478,7 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(
luceneFacets =
new FastTaxonomyFacetCounts(
indexFieldName,
searcherAndTaxonomyManager.taxonomyReader,
searcherAndTaxonomyManager.taxonomyReader(),
indexState.getFacetsConfig(),
drillDowns);
indexFieldNameToFacets.put(indexFieldName, luceneFacets);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class FilteredSSDVFacetCounts extends Facets {
final String field;
final Map<Long, Integer> globalOrdinalToCountIndex;
final List<String> values;
final int[] counts;
int[] counts;

/**
* Facet to count based on sorted set doc values, but only considering the provided values.
Expand All @@ -76,7 +76,6 @@ public FilteredSSDVFacetCounts(
this.field = state.getField();
this.values = values;
dv = state.getDocValues();
counts = new int[values.size()];

// find mapping to go from global ordinal to the value count index
globalOrdinalToCountIndex = new HashMap<>();
Expand All @@ -98,6 +97,12 @@ public FilteredSSDVFacetCounts(
}
}

private void initializeCounts() {
if (counts == null) {
counts = new int[values.size()];
}
}

/** Does all the "real work" of tallying up the counts. */
private void count(List<MatchingDocs> matchingDocs) throws IOException {
OrdinalMap ordinalMap;
Expand All @@ -117,12 +122,12 @@ private void count(List<MatchingDocs> matchingDocs) throws IOException {
// the top-level reader passed to the
// SortedSetDocValuesReaderState, else cryptic
// AIOOBE can happen:
if (ReaderUtil.getTopLevelContext(hits.context).reader() != reader) {
if (ReaderUtil.getTopLevelContext(hits.context()).reader() != reader) {
throw new IllegalStateException(
"the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
}

countOneSegment(ordinalMap, hits.context.reader(), hits.context.ord, hits, null);
countOneSegment(ordinalMap, hits.context().reader(), hits.context().ord, hits, null);
}
}

Expand All @@ -135,6 +140,9 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
return;
}

// Initialize counts:
initializeCounts();

// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
Expand Down Expand Up @@ -177,7 +185,7 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
if (count != 0) {
Integer countIndex = globalOrdinalToCountIndex.get(ordMap.get(ord));
if (countIndex != null) {
counts[countIndex]++;
counts[countIndex] += count;
}
}
}
Expand Down Expand Up @@ -212,12 +220,19 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
private void countOneSegment(
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
throws IOException {
if (hits != null && hits.totalHits() == 0) {
return;
}

SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
if (multiValues == null) {
// nothing to count
return;
}

// Initialize counts:
initializeCounts();

// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
Expand All @@ -229,7 +244,7 @@ private void countOneSegment(
it = FacetUtils.liveDocsDISI(valuesIt, liveDocs);
;
} else {
it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits().iterator(), valuesIt));
}

// TODO: yet another option is to count all segs
Expand All @@ -246,7 +261,7 @@ private void countOneSegment(

int numSegOrds = (int) multiValues.getValueCount();

if (hits != null && hits.totalHits < numSegOrds / 10) {
if (hits != null && hits.totalHits() < numSegOrds / 10) {
// Remap every ord to global ord as we iterate:
if (singleValues != null) {
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
Expand Down Expand Up @@ -288,7 +303,7 @@ private void countOneSegment(
if (count != 0) {
Integer countIndex = globalOrdinalToCountIndex.get(ordMap.get(ord));
if (countIndex != null) {
counts[countIndex]++;
counts[countIndex] += count;
}
}
}
Expand Down Expand Up @@ -337,8 +352,7 @@ private void countAll() throws IOException {
if (liveDocs == null) {
countOneSegmentNHLD(ordinalMap, context.reader(), context.ord);
} else {
countOneSegment(
ordinalMap, context.reader(), context.ord, null, context.reader().getLiveDocs());
countOneSegment(ordinalMap, context.reader(), context.ord, null, liveDocs);
}
}
}
Expand Down Expand Up @@ -366,14 +380,14 @@ private FacetResult getDim(String dim, int topN) throws IOException {
int dimCount = 0;
int childCount = 0;

TopOrdAndIntQueue.OrdAndValue reuse = null;
TopOrdAndIntQueue.OrdAndInt reuse = null;
for (int ord = 0; ord < counts.length; ord++) {
if (counts[ord] > 0) {
dimCount += counts[ord];
childCount++;
if (counts[ord] > bottomCount) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
reuse = new TopOrdAndIntQueue.OrdAndInt();
}
reuse.ord = ord;
reuse.value = counts[ord];
Expand All @@ -382,9 +396,9 @@ private FacetResult getDim(String dim, int topN) throws IOException {
// sparse case unnecessarily
q = new TopOrdAndIntQueue(topN);
}
reuse = q.insertWithOverflow(reuse);
reuse = (TopOrdAndIntQueue.OrdAndInt) q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
bottomCount = ((TopOrdAndIntQueue.OrdAndInt) q.top()).value;
}
}
}
Expand All @@ -396,7 +410,7 @@ private FacetResult getDim(String dim, int topN) throws IOException {

LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
TopOrdAndIntQueue.OrdAndInt ordAndValue = (TopOrdAndIntQueue.OrdAndInt) q.pop();
labelValues[i] = new LabelAndValue(values.get(ordAndValue.ord), ordAndValue.value);
}
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.document.Completion912PostingsFormat;
import org.apache.lucene.search.suggest.document.Completion101PostingsFormat;
import org.apache.lucene.search.suggest.document.ContextSuggestField;

public class ContextSuggestFieldDef extends IndexableFieldDef<Void> {
Expand All @@ -45,7 +45,7 @@ protected ContextSuggestFieldDef(
this.indexAnalyzer = this.parseIndexAnalyzer(requestField);
this.searchAnalyzer = this.parseSearchAnalyzer(requestField);
this.postingsFormat =
new Completion912PostingsFormat(context.config().getCompletionCodecLoadMode());
new Completion101PostingsFormat(context.config().getCompletionCodecLoadMode());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ private CreateSnapshotResponse createSnapshot(
// search is done:
long t0 = System.nanoTime();
IndexReader r =
DirectoryReader.openIfChanged((DirectoryReader) s2.searcher.getIndexReader(), c);
DirectoryReader.openIfChanged((DirectoryReader) s2.searcher().getIndexReader(), c);
IndexSearcher s = new IndexSearcher(r);
try {
shardState.slm.record(s);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,22 @@ private CopyState handle(IndexState indexState, CopyStateRequest copyStateReques
private static CopyState writeCopyState(org.apache.lucene.replicator.nrt.CopyState state)
throws IOException {
CopyState.Builder builder = CopyState.newBuilder();
builder.setInfoBytesLength(state.infosBytes.length);
builder.setInfoBytes(ByteString.copyFrom(state.infosBytes, 0, state.infosBytes.length));
builder.setInfoBytesLength(state.infosBytes().length);
builder.setInfoBytes(ByteString.copyFrom(state.infosBytes(), 0, state.infosBytes().length));

builder.setGen(state.gen);
builder.setVersion(state.version);
builder.setGen(state.gen());
builder.setVersion(state.version());

FilesMetadata filesMetadata = writeFilesMetaData(state.files);
FilesMetadata filesMetadata = writeFilesMetaData(state.files());
builder.setFilesMetadata(filesMetadata);

builder.setCompletedMergeFilesSize(state.completedMergeFiles.size());
builder.setCompletedMergeFilesSize(state.completedMergeFiles().size());

for (String fileName : state.completedMergeFiles) {
for (String fileName : state.completedMergeFiles()) {
builder.addCompletedMergeFiles(fileName);
}

builder.setPrimaryGen(state.primaryGen);
builder.setPrimaryGen(state.primaryGen());

return builder.build();
}
Expand All @@ -117,12 +117,12 @@ public static FilesMetadata writeFilesMetaData(Map<String, FileMetaData> files)
fileMetadataBuilder.setFileName(ent.getKey());

FileMetaData fmd = ent.getValue();
fileMetadataBuilder.setLen(fmd.length);
fileMetadataBuilder.setChecksum(fmd.checksum);
fileMetadataBuilder.setHeaderLength(fmd.header.length);
fileMetadataBuilder.setHeader(ByteString.copyFrom(fmd.header, 0, fmd.header.length));
fileMetadataBuilder.setFooterLength(fmd.footer.length);
fileMetadataBuilder.setFooter(ByteString.copyFrom(fmd.footer, 0, fmd.footer.length));
fileMetadataBuilder.setLen(fmd.length());
fileMetadataBuilder.setChecksum(fmd.checksum());
fileMetadataBuilder.setHeaderLength(fmd.header().length);
fileMetadataBuilder.setHeader(ByteString.copyFrom(fmd.header(), 0, fmd.header().length));
fileMetadataBuilder.setFooterLength(fmd.footer().length);
fileMetadataBuilder.setFooter(ByteString.copyFrom(fmd.footer(), 0, fmd.footer().length));
builder.addFileMetadata(fileMetadataBuilder.build());
}
return builder.build();
Expand Down
Loading
Loading