Commit 5d266c5d authored by Dimitri van Heesch's avatar Dimitri van Heesch

Prevent overly long terms from stopping the search indexer

parent 07d5f3f4
...@@ -30,12 +30,19 @@ ...@@ -30,12 +30,19 @@
// Xapian include // Xapian include
#include <xapian.h> #include <xapian.h>
#define MAX_TERM_LENGTH 245
#if defined(_WIN32) && !defined(__CYGWIN__) #if defined(_WIN32) && !defined(__CYGWIN__)
static char pathSep = '\\'; static char pathSep = '\\';
#else #else
static char pathSep = '/'; static char pathSep = '/';
#endif #endif
static void safeAddTerm(const std::string &term,Xapian::Document &doc,int wfd)
{
if (term.length()<=MAX_TERM_LENGTH) doc.add_term(term,wfd);
}
/** trims \a whitespace characters from the start and end of string \a str. */ /** trims \a whitespace characters from the start and end of string \a str. */
static std::string trim(const std::string& str, static std::string trim(const std::string& str,
const std::string& whitespace = " \t") const std::string& whitespace = " \t")
...@@ -86,10 +93,10 @@ static void addWords(const std::string &s,Xapian::Document &doc,int wfd) ...@@ -86,10 +93,10 @@ static void addWords(const std::string &s,Xapian::Document &doc,int wfd)
std::string word = *it; std::string word = *it;
std::string lword = word; std::string lword = word;
std::transform(lword.begin(), lword.end(), lword.begin(), ::tolower); std::transform(lword.begin(), lword.end(), lword.begin(), ::tolower);
doc.add_term(word,wfd); safeAddTerm(word,doc,wfd);
if (lword!=word) if (lword!=word)
{ {
doc.add_term(lword,wfd); safeAddTerm(lword,doc,wfd);
} }
} }
} }
...@@ -102,7 +109,7 @@ static void addIdentifiers(const std::string &s,Xapian::Document &doc,int wfd) ...@@ -102,7 +109,7 @@ static void addIdentifiers(const std::string &s,Xapian::Document &doc,int wfd)
QCString qs = s.c_str(); QCString qs = s.c_str();
while ((i=re.match(qs,p,&l))!=-1) while ((i=re.match(qs,p,&l))!=-1)
{ {
doc.add_term(qs.mid(p,i-p).data(),wfd); safeAddTerm(qs.mid(p,i-p).data(),doc,wfd);
p=i+l; p=i+l;
} }
} }
...@@ -201,18 +208,18 @@ class XMLContentHandler : public QXmlDefaultHandler ...@@ -201,18 +208,18 @@ class XMLContentHandler : public QXmlDefaultHandler
m_doc.get_value(TypeField)=="file" || m_doc.get_value(TypeField)=="file" ||
m_doc.get_value(TypeField)=="namespace") // containers get highest prio m_doc.get_value(TypeField)=="namespace") // containers get highest prio
{ {
m_doc.add_term(term,1000); safeAddTerm(term,m_doc,1000);
if (!partTerm.empty()) if (!partTerm.empty())
{ {
m_doc.add_term(partTerm,500); safeAddTerm(partTerm,m_doc,500);
} }
} }
else // members and others get lower prio else // members and others get lower prio
{ {
m_doc.add_term(m_doc.get_value(NameField),100); safeAddTerm(m_doc.get_value(NameField),m_doc,100);
if (!partTerm.empty()) if (!partTerm.empty())
{ {
m_doc.add_term(partTerm,50); safeAddTerm(partTerm,m_doc,50);
} }
} }
m_db.add_document(m_doc); m_db.add_document(m_doc);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment