Skip to content

Commit

Permalink
score explanation
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Jan 6, 2025
1 parent 52e318d commit 36db386
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 17 deletions.
2 changes: 1 addition & 1 deletion index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ func isBM25Enabled(req *SearchRequest, m mapping.IndexMapping) (bool, query.Fiel
// scoring. Otherwise, we just skip the presearch
for field := range fs {
f := m.FieldMappingForPath(field)
if f.Similarity == "" || f.Similarity == index.BM25Similarity {
if f.Similarity == index.BM25Similarity {
rv = true
break
}
Expand Down
1 change: 1 addition & 0 deletions mapping/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type IndexMappingImpl struct {
DefaultAnalyzer string `json:"default_analyzer"`
DefaultDateTimeParser string `json:"default_datetime_parser"`
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
DefaultSimilarity string `json:"default_similarity,omitempty"`
DefaultField string `json:"default_field"`
StoreDynamic bool `json:"store_dynamic"`
IndexDynamic bool `json:"index_dynamic"`
Expand Down
61 changes: 46 additions & 15 deletions search/scorer/scorer_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
}

// multiplies deciding how much does a doc length affect the score and also
// how much can the term frequency affect the score in BM25 scoring
var k1 float64 = 1.2
var b float64 = 0.75

func (s *TermQueryScorer) docScore(tf, norm float64) float64 {
// tf-idf scoring by default
score := tf * norm * s.idf
Expand All @@ -140,16 +145,52 @@ func (s *TermQueryScorer) docScore(tf, norm float64) float64 {
// using the posting's norm value to recompute the field length for the doc num
fieldLength := 1 / (norm * norm)

// multiplies deciding how much does a doc length affect the score and also
// how much can the term frequency affect the score
var k1 float64 = 1.2
var b float64 = 0.75
score = s.idf * (tf * k1) /
(tf + k1*(1-b+(b*fieldLength/s.avgDocLength)))
}
return score
}

func (s *TermQueryScorer) scoreExplanation(tf float64, termMatch *index.TermFieldDoc) []*search.Explanation {
var rv []*search.Explanation
if s.avgDocLength > 0 {
fieldLength := 1 / (termMatch.Norm * termMatch.Norm)
fieldNormVal := 1 - b + (b * fieldLength / s.avgDocLength)
fieldNormalizeExplanation := &search.Explanation{
Value: fieldNormVal,
Message: fmt.Sprintf("fieldNorm(field=%s), b=%f, fieldLength=%f, avgFieldLength=%f)",
s.queryField, b, fieldLength, s.avgDocLength),
}

saturationExplanation := &search.Explanation{
Value: k1 / (tf + k1*fieldNormVal),
Message: fmt.Sprintf("saturation(term:%s), k1=%f/(tf=%f + k1*fieldNorm=%f))",
termMatch.Term, tf, k1, fieldNormVal),
Children: []*search.Explanation{fieldNormalizeExplanation},
}

rv = make([]*search.Explanation, 3)
rv[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
rv[1] = saturationExplanation
rv[2] = s.idfExplanation
} else {
rv = make([]*search.Explanation, 3)
rv[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
rv[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
rv[2] = s.idfExplanation
}
return rv
}

func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
Expand All @@ -163,18 +204,8 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
}

score := s.docScore(tf, termMatch.Norm)
// todo: explain stuff properly
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
childrenExplanations := s.scoreExplanation(tf, termMatch)
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Expand Down
2 changes: 1 addition & 1 deletion search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReade
if similaritModelCallback, ok := ctx.Value(search.
GetSimilarityModelCallbackKey).(search.GetSimilarityModelCallbackFn); ok {
similarityModel := similaritModelCallback(field)
if similarityModel == "" || similarityModel == index.BM25Similarity {
if similarityModel == index.BM25Similarity {
// in case of bm25 need to fetch the multipliers as well (perhaps via context's presearch data)
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
if err != nil {
Expand Down

0 comments on commit 36db386

Please sign in to comment.