Skip to content

Commit

Permalink
bug fixes, unit test fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Dec 11, 2024
1 parent dbe105f commit 4b626d0
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 19 deletions.
1 change: 0 additions & 1 deletion index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
atomic.AddUint64(&totalBytesRead, dictStats.BytesRead())
}
fmt.Println("bro what", int64(dict.Cardinality()))
atomic.AddInt64(&fieldCardinality, int64(dict.Cardinality()))

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, ubuntu-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)

Check failure on line 160 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

dict.Cardinality undefined (type segment.TermDictionary has no field or method Cardinality)
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
Expand Down
3 changes: 0 additions & 3 deletions pre_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

package bleve

import "fmt"

// A preSearchResultProcessor processes the data in
// the preSearch result from multiple
// indexes in an alias and merges them together to
Expand Down Expand Up @@ -62,7 +60,6 @@ func newBM25PreSearchResultProcessor() *bm25PreSearchResultProcessor {
// TODO How will this work for queries other than term queries?
func (b *bm25PreSearchResultProcessor) add(sr *SearchResult, indexName string) {
b.docCount += (sr.docCount)
fmt.Println("docCount: ", b.docCount)
for field, cardinality := range sr.fieldCardinality {
b.fieldCardinality[field] += cardinality
}
Expand Down
4 changes: 2 additions & 2 deletions search/scorer/scorer_term_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestTermScorer(t *testing.T) {
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 1.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, search.SearcherOptions{Explain: true})
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))

tests := []struct {
Expand Down Expand Up @@ -175,7 +175,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 3.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, search.SearcherOptions{Explain: true})
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))

scorer.SetQueryNorm(2.0)
Expand Down
44 changes: 31 additions & 13 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,26 @@ func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader,
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boost, options)
}

func tfTDFScoreMetrics(indexReader index.IndexReader) (uint64, int, error) {
func tfTDFScoreMetrics(indexReader index.IndexReader) (uint64, float64, error) {
// default tf-idf stats
count, err := indexReader.DocCount()
if err != nil {
return 0, 0, err
}
fieldCardinality := 0
return count, fieldCardinality, nil

// fmt.Println("----------tf-idf stats--------")
// fmt.Println("docCount: ", count)
// fmt.Println("fieldCardinality: ", fieldCardinality)

if count == 0 && fieldCardinality == 0 {
return 0, 0, nil
}
return count, float64(fieldCardinality / int(count)), nil
}

func bm25ScoreMetrics(ctx context.Context, field string,
indexReader index.IndexReader) (uint64, int, error) {
indexReader index.IndexReader) (uint64, float64, error) {
var count uint64
var fieldCardinality int
var err error
Expand All @@ -93,47 +101,57 @@ func bm25ScoreMetrics(ctx context.Context, field string,
}
}

fmt.Println("----------bm25 stats--------")
fmt.Println("docCount: ", count)
fmt.Println("fieldCardinality: ", fieldCardinality)
fmt.Println("avgDocLength: ", fieldCardinality/int(count))
// fmt.Println("----------bm25 stats--------")
// fmt.Println("docCount: ", count)
// fmt.Println("fieldCardinality: ", fieldCardinality)
// fmt.Println("avgDocLength: ", fieldCardinality/int(count))

return count, fieldCardinality, nil
if count == 0 && fieldCardinality == 0 {
return 0, 0, nil
}
return count, float64(fieldCardinality / int(count)), nil
}

func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
var count uint64
var fieldCardinality int
var avgDocLength float64
var err error
if ctx != nil {
if similaritModelCallback, ok := ctx.Value(search.
GetSimilarityModelCallbackKey).(search.GetSimilarityModelCallbackFn); ok {
similarityModel := similaritModelCallback(field)
if similarityModel == "" || similarityModel == index.BM25Similarity {

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.BM25Similarity

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.BM25Similarity

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.BM25Similarity

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.BM25Similarity

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, ubuntu-latest)

undefined: index.BM25Similarity

Check failure on line 124 in search/searcher/search_term.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.BM25Similarity
// in case of bm25 need to fetch the multipliers as well (perhaps via context's presearch data)
count, fieldCardinality, err = bm25ScoreMetrics(ctx, field, indexReader)
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
} else {
count, fieldCardinality, err = tfTDFScoreMetrics(indexReader)
count, avgDocLength, err = tfTDFScoreMetrics(indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
}
} else {
// default tf-idf stats
count, fieldCardinality, err = tfTDFScoreMetrics(indexReader)
count, avgDocLength, err = tfTDFScoreMetrics(indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
}
} else {
// default tf-idf stats
count, avgDocLength, err = tfTDFScoreMetrics(indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), float64(fieldCardinality/int(count)), options)
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), avgDocLength, options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
Expand Down

0 comments on commit 4b626d0

Please sign in to comment.