Skip to content

Commit

Permalink
hacky start
Browse files Browse the repository at this point in the history
  • Loading branch information
metonymic-smokey authored and Thejas-bhat committed Dec 6, 2024
1 parent bf084bd commit dd4589e
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 8 deletions.
43 changes: 36 additions & 7 deletions index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// in another alias, so we need to do a preSearch search
// and NOT a real search
flags := &preSearchFlags{
knn: requestHasKNN(req),
knn: requestHasKNN(req),
bm25: true, // TODO Just force setting it to true to test
}
return preSearchDataSearch(ctx, req, flags, i.indexes...)
}
Expand Down Expand Up @@ -532,27 +533,39 @@ type asyncSearchResult struct {

// preSearchFlags is a struct to hold flags indicating why preSearch is required
type preSearchFlags struct {
knn bool
knn bool
bm25 bool // needs presearch for this too
}

// preSearchRequired checks if preSearch is required and returns a boolean flag
// It only allocates the preSearchFlags struct if necessary
func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
// Check for KNN query
knn := requestHasKNN(req)
if knn {
var synonyms, bm25 bool
if !isMatchNoneQuery(req.Query) {
// todo fix this cuRRENTLY ALL INDEX mappings are BM25 mappings, need to fix
// this is just a placeholder.
if _, ok := m.(mapping.BM25Mapping); ok {
bm25 = true
}
}
if knn || bm25 {
return &preSearchFlags{
knn: knn,
knn: knn,
bm25: bm25,
}
}
return nil
}

func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
var dummyQuery = req.Query
// create a dummy request with a match none query
// since we only care about the preSearchData in PreSearch
dummyQuery = query.NewMatchNoneQuery()
if !flags.bm25 {
// create a dummy request with a match none query
// since we only care about the preSearchData in PreSearch
dummyQuery = query.NewMatchNoneQuery()
}
dummyRequest := &SearchRequest{
Query: dummyQuery,
}
Expand Down Expand Up @@ -618,6 +631,13 @@ func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool
return false
}

func constructBM25PreSearchData(rv map[string]map[string]interface{}, sr *SearchResult, indexes []Index) map[string]map[string]interface{} {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = sr.totalDocCount
}
return rv
}

func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
mergedOut := make(map[string]map[string]interface{}, len(indexes))
Expand All @@ -631,6 +651,9 @@ func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
return nil, err
}
}
if flags.bm25 {
mergedOut = constructBM25PreSearchData(mergedOut, preSearchResult, indexes)
}
return mergedOut, nil
}

Expand All @@ -655,6 +678,12 @@ func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]
rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
}
}
// TODO Extend to more stats
if totalDocCount, ok := req.PreSearchData[search.BM25PreSearchDataKey].(uint64); ok {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = totalDocCount
}
}
return rv, nil
}

Expand Down
25 changes: 24 additions & 1 deletion index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,12 +449,23 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
}
}

var count uint64
if !isMatchNoneQuery(req.Query) {
if _, ok := i.m.(mapping.BM25Mapping); ok {
count, err = reader.DocCount()
if err != nil {
return nil, err
}
}
}

return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Hits: knnHits,
Hits: knnHits,
totalDocCount: count,
}, nil
}

Expand Down Expand Up @@ -505,6 +516,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}

var knnHits []*search.DocumentMatch
var bm25TotalDocs uint64
var ok bool
var skipKnnCollector bool
if req.PreSearchData != nil {
Expand All @@ -518,6 +530,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
skipKnnCollector = true
case search.BM25PreSearchDataKey:
if v != nil {
bm25TotalDocs, ok = v.(uint64)
if !ok {
return nil, fmt.Errorf("bm25 preSearchData must be of type uint64")
}
}
}
}
}
Expand All @@ -530,6 +549,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr

setKnnHitsInCollector(knnHits, req, coll)

if bm25TotalDocs > 0 {
ctx = context.WithValue(ctx, search.BM25MapKey, bm25TotalDocs)
}

// This callback and variable handles the tracking of bytes read
// 1. as part of creation of tfr and its Next() calls which is
// accounted by invoking this callback when the TFR is closed.
Expand Down
3 changes: 3 additions & 0 deletions mapping/mapping.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ type IndexMapping interface {

FieldMappingForPath(path string) FieldMapping
}
type BM25Mapping interface {
IndexMapping
}
23 changes: 23 additions & 0 deletions pre_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,24 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
}
}

// -----------------------------------------------------------------------------
type bm25PreSearchResultProcessor struct {
docCount uint64 // bm25 specific stats
}

func newBM25PreSearchResultProcessor() *bm25PreSearchResultProcessor {
return &bm25PreSearchResultProcessor{}
}

// TODO How will this work for queries other than term queries?
func (b *bm25PreSearchResultProcessor) add(sr *SearchResult, indexName string) {
b.docCount += (sr.totalDocCount)
}

func (b *bm25PreSearchResultProcessor) finalize(sr *SearchResult) {

}

// -----------------------------------------------------------------------------
// Master struct that can hold any number of presearch result processors
type compositePreSearchResultProcessor struct {
Expand Down Expand Up @@ -73,6 +91,11 @@ func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) p
processors = append(processors, knnProcessor)
}
}
if flags.bm25 {
if bm25Processtor := newBM25PreSearchResultProcessor(); bm25Processtor != nil {
processors = append(processors, bm25Processtor)
}
}
// Return based on the number of processors, optimizing for the common case of 1 processor
// If there are no processors, return nil
switch len(processors) {
Expand Down
3 changes: 3 additions & 0 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ type SearchResult struct {
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
// The following fields are applicable to BM25 preSearch
// todo add more fields beyond docCount
totalDocCount uint64
}

func (sr *SearchResult) Size() int {
Expand Down
1 change: 1 addition & 0 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, te

func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
// TODO Instead of passing count from reader here, do it using the presearch phase stats.
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
Expand Down
3 changes: 3 additions & 0 deletions search/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ const MinGeoBufPoolSize = 24
type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool

const KnnPreSearchDataKey = "_knn_pre_search_data_key"
const BM25PreSearchDataKey = "_bm25_pre_search_data_key"

const PreSearchKey = "_presearch_key"

Expand All @@ -144,5 +145,7 @@ type ScoreExplCorrectionCallbackFunc func(queryMatch *DocumentMatch, knnMatch *D
type SearcherStartCallbackFn func(size uint64) error
type SearcherEndCallbackFn func(size uint64) error

const BM25MapKey = "_bm25_map_key"

const SearcherStartCallbackKey = "_searcher_start_callback_key"
const SearcherEndCallbackKey = "_searcher_end_callback_key"

0 comments on commit dd4589e

Please sign in to comment.