Skip to content

Commit

Permalink
backport Yelp#735 and Yelp#732 to v0.x branch
Browse files Browse the repository at this point in the history
  • Loading branch information
taoyyu committed Oct 1, 2024
1 parent 7ef3630 commit c3243df
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 15 deletions.
3 changes: 3 additions & 0 deletions clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,9 @@ message Field {
int32 vectorDimensions = 29; // Dimensions for vector field type
// If field based global ordinals should be built up front, otherwise this is done lazily on first access. Currently only for fields with text doc values (TEXT/ATOM).
bool eagerFieldGlobalOrdinals = 30;
// For arrays of strings, ignoreAbove will be applied for each array element separately and string elements longer than ignore_above will not be indexed or stored.
// This option is also useful for protecting against Lucene’s term byte-length limit of 32766
optional int32 ignoreAbove = 36;
}

/* Input to registerFields */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.BytesRef;

/** Field class for 'ATOM' field type. Uses {@link KeywordAnalyzer} for text analysis. */
public class AtomFieldDef extends TextBaseFieldDef implements Sortable {
Expand Down Expand Up @@ -86,7 +87,7 @@ protected DocValuesType parseDocValuesType(Field requestField) {

@Override
public Object parseLastValue(String value) {
return value;
return new BytesRef(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public abstract class TextBaseFieldDef extends IndexableFieldDef

public final Map<IndexReader.CacheKey, GlobalOrdinalLookup> ordinalLookupCache = new HashMap<>();
private final Object ordinalBuilderLock = new Object();
private final int ignoreAbove;

/**
* Field constructor. Uses {@link IndexableFieldDef#IndexableFieldDef(String, Field)} to do common
Expand All @@ -82,6 +83,7 @@ protected TextBaseFieldDef(String name, Field requestField) {
indexAnalyzer = parseIndexAnalyzer(requestField);
searchAnalyzer = parseSearchAnalyzer(requestField);
eagerFieldGlobalOrdinals = requestField.getEagerFieldGlobalOrdinals();
ignoreAbove = requestField.hasIgnoreAbove() ? requestField.getIgnoreAbove() : Integer.MAX_VALUE;
}

@Override
Expand Down Expand Up @@ -277,18 +279,20 @@ public void parseDocumentField(
BytesRef stringBytes = new BytesRef(fieldStr);
if (docValuesType == DocValuesType.BINARY) {
document.add(new BinaryDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED) {
document.add(new SortedDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED_SET) {
document.add(new SortedSetDocValuesField(getName(), stringBytes));
} else {
throw new IllegalArgumentException(
String.format(
"Unsupported doc value type %s for field %s", docValuesType, this.getName()));
} else if (fieldStr.length() <= ignoreAbove) {
if (docValuesType == DocValuesType.SORTED) {
document.add(new SortedDocValuesField(getName(), stringBytes));
} else if (docValuesType == DocValuesType.SORTED_SET) {
document.add(new SortedSetDocValuesField(getName(), stringBytes));
} else {
throw new IllegalArgumentException(
String.format(
"Unsupported doc value type %s for field %s", docValuesType, this.getName()));
}
}
}

if (isStored() || isSearchable()) {
if ((isStored() || isSearchable()) && fieldStr.length() <= ignoreAbove) {
document.add(new FieldWithData(getName(), fieldType, fieldStr));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -89,8 +90,14 @@ public void fillLastHit(SearchResponse.SearchState.Builder stateBuilder, ScoreDo
LastHitInfo.Builder lastHitBuilder = LastHitInfo.newBuilder();
lastHitBuilder.setLastDocId(lastHit.doc);
for (Object fv : fd.fields) {
stateBuilder.addLastFieldValues(fv.toString());
lastHitBuilder.addLastFieldValues(fv.toString());
String fvstr;
if (fv instanceof BytesRef) {
fvstr = ((BytesRef) fv).utf8ToString();
} else {
fvstr = fv.toString();
}
stateBuilder.addLastFieldValues(fvstr);
lastHitBuilder.addLastFieldValues(fvstr);
}
stateBuilder.setLastHitInfo(lastHitBuilder.build());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;

/**
* Class to handle creation of a {@link Sort} used to sort documents by field values for queries.
Expand Down Expand Up @@ -195,7 +196,11 @@ private static SearchResponse.Hit.CompositeFieldValue getValueForSortField(
break;
case STRING:
case STRING_VAL:
fieldValue.setTextValue((String) sortValue);
if (sortValue instanceof BytesRef) {
fieldValue.setTextValue(((BytesRef) sortValue).utf8ToString());
} else {
fieldValue.setTextValue((String) sortValue);
}
break;
case CUSTOM:
// could be anything, try to determine from value class
Expand Down
141 changes: 141 additions & 0 deletions src/test/java/com/yelp/nrtsearch/server/grpc/IgnoreAboveTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Copyright 2022 Yelp Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.yelp.nrtsearch.server.grpc;

import static org.junit.Assert.assertEquals;

import com.yelp.nrtsearch.server.config.IndexStartConfig.IndexDataLocationType;
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField;
import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;
import org.junit.After;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

public class IgnoreAboveTest {

@Rule public final TemporaryFolder folder = new TemporaryFolder();

private static final List<Field> fields =
List.of(
Field.newBuilder()
.setName("id")
.setType(FieldType._ID)
.setStoreDocValues(true)
.setSearch(true)
.build(),
Field.newBuilder()
.setName("field1")
.setStoreDocValues(true)
.setSearch(true)
.setMultiValued(true)
.setIgnoreAbove(12)
.setTokenize(true)
.setType(FieldType.TEXT)
.build());

@After
public void cleanup() {
TestServer.cleanupAll();
}

private void addInitialDoc(TestServer testServer) {
AddDocumentRequest addDocumentRequest =
AddDocumentRequest.newBuilder()
.setIndexName("test_index")
.putFields("id", MultiValuedField.newBuilder().addValue("1").build())
.putFields("field1", MultiValuedField.newBuilder().addValue("first Vendor").build())
.build();
testServer.addDocs(Stream.of(addDocumentRequest));
}

private void addAdditionalDoc(TestServer testServer) {
AddDocumentRequest addDocumentRequest =
AddDocumentRequest.newBuilder()
.setIndexName("test_index")
.putFields("id", MultiValuedField.newBuilder().addValue("2").build())
.putFields(
"field1",
MultiValuedField.newBuilder()
.addValue("second Vendor")
.addValue("new Vendor")
.build())
.build();
testServer.addDocs(Stream.of(addDocumentRequest));
}

private void verifyDocs(TestServer testServer) {
SearchRequest request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("first").build())
.build())
.build();
SearchResponse response = testServer.getClient().getBlockingStub().search(request);
assertEquals(1, response.getHitsCount());
request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("second").build())
.build())
.build();
response = testServer.getClient().getBlockingStub().search(request);
assertEquals(0, response.getHitsCount());
request =
SearchRequest.newBuilder()
.setIndexName("test_index")
.addRetrieveFields("id")
.setStartHit(0)
.setTopHits(10)
.setQuery(
Query.newBuilder()
.setMatchQuery(
MatchQuery.newBuilder().setField("field1").setQuery("new").build())
.build())
.build();
response = testServer.getClient().getBlockingStub().search(request);
assertEquals(1, response.getHitsCount());
}

@Test
public void testIgnoreAbove() throws IOException {
TestServer primaryServer =
TestServer.builder(folder)
.withAutoStartConfig(true, Mode.PRIMARY, 0, IndexDataLocationType.LOCAL)
.build();
primaryServer.createIndex("test_index");
primaryServer.registerFields("test_index", fields);
primaryServer.startPrimaryIndex("test_index", -1, null);
addInitialDoc(primaryServer);
addAdditionalDoc(primaryServer);
primaryServer.refresh("test_index");
verifyDocs(primaryServer);
}
}
61 changes: 61 additions & 0 deletions src/test/java/com/yelp/nrtsearch/server/grpc/SortFieldTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,38 @@ public void testReverseSortQueryVirtualField() {
}
}

@Test
public void testSortAtomDocId() {
QuerySortField querySortField =
QuerySortField.newBuilder()
.setFields(
SortFields.newBuilder()
.addSortedFields(SortType.newBuilder().setFieldName("doc_id").build())
.build())
.build();
SearchResponse searchResponse = doSortQuery(querySortField);
assertEquals(5, searchResponse.getHitsCount());
List<String> expectedIds = Arrays.asList("0", "1", "10", "11", "12");
assertFields(expectedIds, searchResponse.getHitsList());
}

@Test
public void testSortAtomDocIdSearchAfter() {
QuerySortField querySortField =
QuerySortField.newBuilder()
.setFields(
SortFields.newBuilder()
.addSortedFields(SortType.newBuilder().setFieldName("doc_id").build())
.build())
.build();
LastHitInfo searchAfter = LastHitInfo.newBuilder().addLastFieldValues("1").build();
SearchResponse searchResponse = dosSortQuerySearchAfter(querySortField, searchAfter);
assertEquals(5, searchResponse.getHitsCount());

List<String> expectedIds = Arrays.asList("1", "10", "11", "12", "13");
assertFields(expectedIds, searchResponse.getHitsList());
}

@Test
public void testSortDocId() {
QuerySortField querySortField =
Expand Down Expand Up @@ -783,6 +815,35 @@ public void testSortLanLonDistanceInInnerHit() {
}
}

@Test
public void testSortIntFieldWithSearchAfter() {
QuerySortField querySortField =
QuerySortField.newBuilder()
.setFields(
SortFields.newBuilder()
.addSortedFields(SortType.newBuilder().setFieldName("int_field").build())
.build())
.build();
LastHitInfo searchAfter = LastHitInfo.newBuilder().addLastFieldValues("1").build();
SearchResponse searchResponse = dosSortQuerySearchAfter(querySortField, searchAfter);
assertEquals(5, searchResponse.getHitsCount());

List<String> expectedIds = Arrays.asList("91", "92", "93", "94", "95");
assertFields(expectedIds, searchResponse.getHitsList());

List<Integer> expectedSort = Arrays.asList(1, 2, 3, 4, 5);
for (int i = 0; i < searchResponse.getHitsCount(); ++i) {
var hit = searchResponse.getHits(i);
assertEquals(1, hit.getSortedFieldsCount());
assertEquals(
expectedSort.get(i).intValue(),
hit.getSortedFieldsOrThrow("int_field").getFieldValue(0).getIntValue());

assertEquals(0.0, hit.getScore(), 0);
assertEquals(6, hit.getFieldsCount());
}
}

@Test
public void testSortDocIdWithSearchAfter() {
QuerySortField querySortField =
Expand Down
5 changes: 3 additions & 2 deletions src/test/resources/search/SortFieldRegisterFields.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
{
"name": "doc_id",
"type": "ATOM",
"storeDocValues": true
"storeDocValues": true,
"group": true
},
{
"name": "int_field",
Expand Down Expand Up @@ -60,4 +61,4 @@
]
}
]
}
}

0 comments on commit c3243df

Please sign in to comment.