Skip to content

Commit

Permalink
add ignore_above support for nrtsearch (#735)
Browse files Browse the repository at this point in the history
* add ignore_above support for nrtsearch

* store binary doc values
  • Loading branch information
taoyyu authored Oct 1, 2024
1 parent 1b3de8e commit d1b5187
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 9 deletions.
3 changes: 3 additions & 0 deletions clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,9 @@ message Field {
VectorElementType vectorElementType = 34;
// Position increment gap for indexing multi valued TEXT fields. Must be >= 0, defaulting to 100 when not set.
optional int32 positionIncrementGap = 35;
// For arrays of strings, ignoreAbove will be applied for each array element separately and string elements longer than ignore_above will not be indexed or stored.
// This option is also useful for protecting against Lucene’s term byte-length limit of 32766
optional int32 ignoreAbove = 36;
}

// Vector field element type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public abstract class TextBaseFieldDef extends IndexableFieldDef

public final Map<IndexReader.CacheKey, GlobalOrdinalLookup> ordinalLookupCache = new HashMap<>();
private final Object ordinalBuilderLock = new Object();
private final int ignoreAbove;

/**
* Field constructor. Uses {@link IndexableFieldDef#IndexableFieldDef(String, Field)} to do common
Expand All @@ -76,6 +77,7 @@ protected TextBaseFieldDef(String name, Field requestField) {
indexAnalyzer = parseIndexAnalyzer(requestField);
searchAnalyzer = parseSearchAnalyzer(requestField);
eagerFieldGlobalOrdinals = requestField.getEagerFieldGlobalOrdinals();
ignoreAbove = requestField.hasIgnoreAbove() ? requestField.getIgnoreAbove() : Integer.MAX_VALUE;
}

@Override
Expand Down Expand Up @@ -236,18 +238,20 @@ public void parseDocumentField(
BytesRef stringBytes = new BytesRef(fieldStr);
if (docValuesType == DocValuesType.BINARY) {
document.add(new BinaryDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED) {
document.add(new SortedDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED_SET) {
document.add(new SortedSetDocValuesField(getName(), stringBytes));
} else {
throw new IllegalArgumentException(
String.format(
"Unsupported doc value type %s for field %s", docValuesType, this.getName()));
} else if (fieldStr.length() <= ignoreAbove) {
if (docValuesType == DocValuesType.SORTED) {
document.add(new SortedDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED_SET) {
document.add(new SortedSetDocValuesField(getName(), stringBytes));
} else {
throw new IllegalArgumentException(
String.format(
"Unsupported doc value type %s for field %s", docValuesType, this.getName()));
}
}
}

if (isStored() || isSearchable()) {
if ((isStored() || isSearchable()) && fieldStr.length() <= ignoreAbove) {
document.add(new FieldWithData(getName(), fieldType, fieldStr));
}

Expand Down
140 changes: 140 additions & 0 deletions src/test/java/com/yelp/nrtsearch/server/grpc/IgnoreAboveTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Copyright 2022 Yelp Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.yelp.nrtsearch.server.grpc;

import static org.junit.Assert.assertEquals;

import com.yelp.nrtsearch.server.config.IndexStartConfig.IndexDataLocationType;
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField;
import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;
import org.junit.After;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

public class IgnoreAboveTest {

@Rule public final TemporaryFolder folder = new TemporaryFolder();

private static final List<Field> fields =
List.of(
Field.newBuilder()
.setName("id")
.setType(FieldType._ID)
.setStoreDocValues(true)
.setSearch(true)
.build(),
Field.newBuilder()
.setName("field1")
.setStoreDocValues(true)
.setSearch(true)
.setMultiValued(true)
.setIgnoreAbove(12)
.setType(FieldType.TEXT)
.build());

@After
public void cleanup() {
TestServer.cleanupAll();
}

private void addInitialDoc(TestServer testServer) {
AddDocumentRequest addDocumentRequest =
AddDocumentRequest.newBuilder()
.setIndexName("test_index")
.putFields("id", MultiValuedField.newBuilder().addValue("1").build())
.putFields("field1", MultiValuedField.newBuilder().addValue("first Vendor").build())
.build();
testServer.addDocs(Stream.of(addDocumentRequest));
}

private void addAdditionalDoc(TestServer testServer) {
AddDocumentRequest addDocumentRequest =
AddDocumentRequest.newBuilder()
.setIndexName("test_index")
.putFields("id", MultiValuedField.newBuilder().addValue("2").build())
.putFields(
"field1",
MultiValuedField.newBuilder()
.addValue("second Vendor")
.addValue("new Vendor")
.build())
.build();
testServer.addDocs(Stream.of(addDocumentRequest));
}

private void verifyDocs(TestServer testServer) {
SearchRequest request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("first").build())
.build())
.build();
SearchResponse response = testServer.getClient().getBlockingStub().search(request);
assertEquals(1, response.getHitsCount());
request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("second").build())
.build())
.build();
response = testServer.getClient().getBlockingStub().search(request);
assertEquals(0, response.getHitsCount());
request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("new").build())
.build())
.build();
response = testServer.getClient().getBlockingStub().search(request);
assertEquals(1, response.getHitsCount());
}

@Test
public void testIgnoreAbove() throws IOException {
TestServer primaryServer =
TestServer.builder(folder)
.withAutoStartConfig(true, Mode.PRIMARY, 0, IndexDataLocationType.LOCAL)
.build();
primaryServer.createIndex("test_index");
primaryServer.registerFields("test_index", fields);
primaryServer.startPrimaryIndex("test_index", -1, null);
addInitialDoc(primaryServer);
addAdditionalDoc(primaryServer);
primaryServer.refresh("test_index");
verifyDocs(primaryServer);
}
}

0 comments on commit d1b5187

Please sign in to comment.