Skip to content

Commit

Permalink
Addressed more SonarQube problems
Browse files Browse the repository at this point in the history
  • Loading branch information
Flixtastic committed Jan 11, 2025
1 parent 1e0fc14 commit 9f9738c
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 39 deletions.
86 changes: 54 additions & 32 deletions src/index/IndexImpl.Text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

// _____________________________________________________________________________
cppcoro::generator<WordsFileLine> IndexImpl::wordsInTextRecords(
const std::string& contextFile, bool addWordsFromLiterals) const {
std::string contextFile, bool addWordsFromLiterals) const {
auto localeManager = textVocab_.getLocaleManager();
// ROUND 1: If context file aka wordsfile is not empty, read words from there.
// Remember the last context id for the (optional) second round.
Expand Down Expand Up @@ -62,6 +62,56 @@ cppcoro::generator<WordsFileLine> IndexImpl::wordsInTextRecords(
}
}

// _____________________________________________________________________________
void IndexImpl::processEntityCaseDuringInvertedListProcessing(
const WordsFileLine& line,
ad_utility::HashMap<Id, Score>& entitiesInContext, size_t& nofLiterals,
size_t& entityNotFoundErrorMsgCount) const {
VocabIndex eid;
// TODO<joka921> Currently only IRIs and strings from the vocabulary can
// be tagged entities in the text index (no doubles, ints, etc).
if (getVocab().getId(line.word_, &eid)) {
// Note that `entitiesInContext` is a HashMap, so the `Id`s don't have
// to be contiguous.
entitiesInContext[Id::makeFromVocabIndex(eid)] += line.score_;
if (line.isLiteralEntity_) {
++nofLiterals;
}
} else {
logEntityNotFound(line.word_, entityNotFoundErrorMsgCount);
}
}

// _____________________________________________________________________________
void IndexImpl::processWordCaseDuringInvertedListProcessing(
const WordsFileLine& line,
ad_utility::HashMap<WordIndex, Score>& wordsInContext) const {
// TODO<joka921> Let the `textVocab_` return a `WordIndex` directly.
WordVocabIndex vid;
bool ret = textVocab_.getId(line.word_, &vid);
WordIndex wid = vid.get();
if (!ret) {
LOG(ERROR) << "ERROR: word \"" << line.word_ << "\" "
<< "not found in textVocab. Terminating\n";
AD_FAIL();
}

Check warning on line 97 in src/index/IndexImpl.Text.cpp

View check run for this annotation

Codecov / codecov/patch

src/index/IndexImpl.Text.cpp#L94-L97

Added lines #L94 - L97 were not covered by tests
wordsInContext[wid] += line.score_;
}

// _____________________________________________________________________________
void IndexImpl::logEntityNotFound(const string& word,
size_t& entityNotFoundErrorMsgCount) const {
if (entityNotFoundErrorMsgCount < 20) {
LOG(WARN) << "Entity from text not in KB: " << word << '\n';
if (++entityNotFoundErrorMsgCount == 20) {
LOG(WARN) << "There are more entities not in the KB..."
<< " suppressing further warnings...\n";
}

Check warning on line 109 in src/index/IndexImpl.Text.cpp

View check run for this annotation

Codecov / codecov/patch

src/index/IndexImpl.Text.cpp#L107-L109

Added lines #L107 - L109 were not covered by tests
} else {
entityNotFoundErrorMsgCount++;
}

Check warning on line 112 in src/index/IndexImpl.Text.cpp

View check run for this annotation

Codecov / codecov/patch

src/index/IndexImpl.Text.cpp#L111-L112

Added lines #L111 - L112 were not covered by tests
}

// _____________________________________________________________________________
void IndexImpl::addTextFromContextFile(const string& contextFile,
bool addWordsFromLiterals) {
Expand Down Expand Up @@ -234,39 +284,11 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile,
}
if (line.isEntity_) {
++nofEntityPostings;
// TODO<joka921> Currently only IRIs and strings from the vocabulary can
// be tagged entities in the text index (no doubles, ints, etc).
VocabIndex eid;
if (getVocab().getId(line.word_, &eid)) {
// Note that `entitiesInContext` is a HashMap, so the `Id`s don't have
// to be contiguous.
entitiesInContext[Id::makeFromVocabIndex(eid)] += line.score_;
if (line.isLiteralEntity_) {
++nofLiterals;
}
} else {
if (entityNotFoundErrorMsgCount < 20) {
LOG(WARN) << "Entity from text not in KB: " << line.word_ << '\n';
if (++entityNotFoundErrorMsgCount == 20) {
LOG(WARN) << "There are more entities not in the KB..."
<< " suppressing further warnings...\n";
}
} else {
entityNotFoundErrorMsgCount++;
}
}
processEntityCaseDuringInvertedListProcessing(
line, entitiesInContext, nofLiterals, entityNotFoundErrorMsgCount);
} else {
++nofWordPostings;
// TODO<joka921> Let the `textVocab_` return a `WordIndex` directly.
WordVocabIndex vid;
bool ret = textVocab_.getId(line.word_, &vid);
WordIndex wid = vid.get();
if (!ret) {
LOG(ERROR) << "ERROR: word \"" << line.word_ << "\" "
<< "not found in textVocab. Terminating\n";
AD_FAIL();
}
wordsInContext[wid] += line.score_;
processWordCaseDuringInvertedListProcessing(line, wordsInContext);
}
}
if (entityNotFoundErrorMsgCount > 0) {
Expand Down
14 changes: 13 additions & 1 deletion src/index/IndexImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,19 @@ class IndexImpl {
// testing phase, once it works, it should be easy to include the IRIs and
// literals from the external vocabulary as well).
cppcoro::generator<WordsFileLine> wordsInTextRecords(
const std::string& contextFile, bool addWordsFromLiterals) const;
std::string contextFile, bool addWordsFromLiterals) const;

void processEntityCaseDuringInvertedListProcessing(
const WordsFileLine& line,
ad_utility::HashMap<Id, Score>& entitiesInContxt, size_t& nofLiterals,
size_t& entityNotFoundErrorMsgCount) const;

void processWordCaseDuringInvertedListProcessing(
const WordsFileLine& line,
ad_utility::HashMap<WordIndex, Score>& wordsInContext) const;

void logEntityNotFound(const string& word,
size_t& entityNotFoundErrorMsgCount) const;

size_t processWordsForVocabulary(const string& contextFile,
bool addWordsFromLiterals);
Expand Down
6 changes: 3 additions & 3 deletions src/parser/WordsAndDocsFileParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ad_utility::InputRangeFromGet<WordsFileLine>::Storage WordsFileParser::get() {
string l;
if (!std::getline(getInputStream(), l)) {
return std::nullopt;
};
}
std::string_view lineView(l);
size_t i = lineView.find('\t');
assert(i != string::npos);
Expand All @@ -48,11 +48,11 @@ ad_utility::InputRangeFromGet<WordsFileLine>::Storage WordsFileParser::get() {

// _____________________________________________________________________________
ad_utility::InputRangeFromGet<DocsFileLine>::Storage DocsFileParser::get() {
DocsFileLine line;
string l;
if (!std::getline(getInputStream(), l)) {
return std::nullopt;
};
}
DocsFileLine line;
size_t i = l.find('\t');
assert(i != string::npos);
line.docId_ = DocumentIndex::make(atol(l.substr(0, i).c_str()));
Expand Down
4 changes: 1 addition & 3 deletions src/parser/WordsAndDocsFileParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ struct WordsFileLine {
* TextRecordIndex as type of one column. Those get
* mapped to the next bigger or equal docId which is
* then used to extract the text from the docsDB.
* TODO: check if this behaviour is consistently
* implemented.
* - string docContent_: The whole text given after the first tab of a line of
* docsfile.
*/
Expand Down Expand Up @@ -145,7 +143,7 @@ class WordsAndDocsFileParser {

protected:
std::ifstream& getInputStream() { return in_; }
const LocaleManager& getLocaleManager() { return localeManager_; }
const LocaleManager& getLocaleManager() const { return localeManager_; }

private:
std::ifstream in_;
Expand Down

0 comments on commit 9f9738c

Please sign in to comment.