Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add search option to apply term boosting #274

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

`MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html).

## unreleased

- Add `boostTerm` search option to apply a custom boosting factor to specific
terms in the query

## v7.0.2

- [fix] Fix regression on tokenizer producing blank terms when multiple
Expand Down
16 changes: 16 additions & 0 deletions src/MiniSearch.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,22 @@ describe('MiniSearch', () => {
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactor)
})

it('boosts terms by calling boostTerm with normalized query term, term index in the query, and array of all query terms', () => {
const query = 'Commedia nova'
const boostFactors = {
commedia: 1.5,
nova: 1.1
}
const boostTerm = jest.fn((term, i, terms) => boostFactors[term])
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostTerm })

expect(boostTerm).toHaveBeenCalledWith('commedia', 0, ['commedia', 'nova'])
expect(boostTerm).toHaveBeenCalledWith('nova', 1, ['commedia', 'nova'])
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactors.commedia)
expect(results[1].score).toBeCloseTo(resultsWithoutBoost[1].score * boostFactors.nova)
})

it('skips document if boostDocument returns a falsy value', () => {
const query = 'vita'
const boostDocument = jest.fn((id, term) => id === 3 ? null : 1)
Expand Down
30 changes: 24 additions & 6 deletions src/MiniSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ export type SearchOptions = {
*/
boost?: { [fieldName: string]: number },

/**
* Function to calculate a boost factor for each term.
*
* This function, if provided, is called for each query term (as split by
* `tokenize` and processed by `processTerm`). The arguments passed to the
* function are the query term, the positional index of the term in the query,
* and the array of all query terms. It is expected to return a numeric boost
* factor for the term. A factor lower than 1 reduces the importance of the
* term, a factor greater than 1 increases it. A factor of exactly 1 is
* neutral, and does not affect the term's importance.
*/
boostTerm?: (term: string, i: number, terms: string[]) => number,

/**
* Relative weights to assign to prefix search results and fuzzy search
* results. Exact matches are assigned a weight of 1.
Expand Down Expand Up @@ -480,7 +493,8 @@ export type AutoVacuumOptions = VacuumOptions & VacuumConditions
type QuerySpec = {
prefix: boolean,
fuzzy: number | boolean,
term: string
term: string,
termBoost: number
}

type DocumentTermFreqs = Map<number, number>
Expand Down Expand Up @@ -1685,7 +1699,7 @@ export default class MiniSearch<T = any> {
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights }

const data = this._index.get(query.term)
const results = this.termResults(query.term, query.term, 1, data, boosts, boostDocument, bm25params)
const results = this.termResults(query.term, query.term, 1, query.termBoost, data, boosts, boostDocument, bm25params)

let prefixMatches
let fuzzyMatches
Expand Down Expand Up @@ -1715,7 +1729,7 @@ export default class MiniSearch<T = any> {
// account for the fact that prefix matches stay more relevant than
// fuzzy matches for longer distances.
const weight = prefixWeight * term.length / (term.length + 0.3 * distance)
this.termResults(query.term, term, weight, data, boosts, boostDocument, bm25params, results)
this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results)
}
}

Expand All @@ -1727,7 +1741,7 @@ export default class MiniSearch<T = any> {
// Weight gradually approaches 0 as distance goes to infinity, with the
// weight for the hypothetical distance 0 being equal to fuzzyWeight.
const weight = fuzzyWeight * term.length / (term.length + distance)
this.termResults(query.term, term, weight, data, boosts, boostDocument, bm25params, results)
this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results)
}
}

Expand Down Expand Up @@ -1826,6 +1840,7 @@ export default class MiniSearch<T = any> {
sourceTerm: string,
derivedTerm: string,
termWeight: number,
termBoost: number,
fieldTermData: FieldTermData | undefined,
fieldBoosts: { [field: string]: number },
boostDocumentFn: ((id: any, term: string, storedFields?: Record<string, unknown>) => number) | undefined,
Expand Down Expand Up @@ -1864,7 +1879,7 @@ export default class MiniSearch<T = any> {
// present. This is currently not supported, and may require further
// analysis to see if it is a valid use case.
const rawScore = calcBM25Score(termFreq, matchingFields, this._documentCount, fieldLength, avgFieldLength, bm25params)
const weightedScore = termWeight * fieldBoost * docBoost * rawScore
const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore

const result = results.get(docId)
if (result) {
Expand Down Expand Up @@ -2118,7 +2133,10 @@ const termToQuerySpec = (options: SearchOptions) => (term: string, i: number, te
const prefix = (typeof options.prefix === 'function')
? options.prefix(term, i, terms)
: (options.prefix === true)
return { term, fuzzy, prefix }
const termBoost = (typeof options.boostTerm === 'function')
? options.boostTerm(term, i, terms)
: 1
return { term, fuzzy, prefix, termBoost }
}

const defaultOptions = {
Expand Down