Skip to content

Commit

Permalink
update score explanation, code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Jan 13, 2025
1 parent d478f4f commit eaca63a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
25 changes: 18 additions & 7 deletions search/scorer/scorer_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ func (s *TermQueryScorer) computeIDF(avgDocLength float64, docTotal, docTerm uin
return rv
}

// queryTerm - the specific term being scored by this scorer object
// queryField - the field in which the term is being searched
// queryBoost - the boost value for the query term
// docTotal - total number of documents in the index
// docTerm - number of documents containing the term
// avgDocLength - average document length in the index
// options - search options such as explain scoring, include the location of the term etc.
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal,
docTerm uint64, avgDocLength float64, options search.SearcherOptions) *TermQueryScorer {

Expand Down Expand Up @@ -132,18 +139,21 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
}

func (s *TermQueryScorer) docScore(tf, norm float64) float64 {
// tf-idf scoring by default
score := tf * norm * s.idf
func (s *TermQueryScorer) docScore(tf, norm float64) (score float64, model string) {
if s.avgDocLength > 0 {
// bm25 scoring
// using the posting's norm value to recompute the field length for the doc num
fieldLength := 1 / (norm * norm)

score = s.idf * (tf * search.BM25_k1) /
(tf + search.BM25_k1*(1-search.BM25_b+(search.BM25_b*fieldLength/s.avgDocLength)))
model = index.BM25Scoring
} else {
// tf-idf scoring by default
score = tf * norm * s.idf
model = index.DefaultScoringModel
}
return score
return score, model
}

func (s *TermQueryScorer) scoreExplanation(tf float64, termMatch *index.TermFieldDoc) []*search.Explanation {
Expand Down Expand Up @@ -198,12 +208,13 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
tf = math.Sqrt(float64(termMatch.Freq))
}

score := s.docScore(tf, termMatch.Norm)
score, scoringModel := s.docScore(tf, termMatch.Norm)
if s.options.Explain {
childrenExplanations := s.scoreExplanation(tf, termMatch)
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), as per %s model, "+
"product of:", s.queryField, s.queryTerm, termMatch.ID, scoringModel),
Children: childrenExplanations,
}
}
Expand Down
8 changes: 4 additions & 4 deletions search/scorer/scorer_term_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestTermScorer(t *testing.T) {
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Expand Down Expand Up @@ -100,7 +100,7 @@ func TestTermScorer(t *testing.T) {
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Expand Down Expand Up @@ -131,7 +131,7 @@ func TestTermScorer(t *testing.T) {
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
Children: []*search.Explanation{
{
Value: math.Sqrt(65),
Expand Down Expand Up @@ -224,7 +224,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
},
{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
Message: "fieldWeight(desc:beer in one), as per tfidf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Expand Down
8 changes: 5 additions & 3 deletions search/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,12 @@ func (f FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
}
}

// BM25 specific multipliers which affect the scoring of a document.
// BM25 specific multipliers which control the scoring of a document.
//
// BM25_b - how much does a doc's field length affect the score
// BM25_k1 - how much can the term frequency affect the score
// BM25_b - controls the extent to which doc's field length normalize term frequency part of score
// BM25_k1 - controls the saturation of the score due to term frequency
// the default values are as per elastic search's implementation
// - https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html#bm25
var BM25_k1 float64 = 1.2
var BM25_b float64 = 0.75

Expand Down

0 comments on commit eaca63a

Please sign in to comment.