Commit 6538fdca authored by Dimitri van Heesch's avatar Dimitri van Heesch

Bug 705910 - Indexing and searching cannot treat non ASCII identifiers

parent e193c540
......@@ -138,31 +138,6 @@ class MemberIndexList : public QList<MemberDef>
uint m_letter;
};
/** @brief maps a unicode character code to a list of T::ElementType's
*/
template<class T>
class LetterToIndexMap : public SIntDict<T>
{
public:
LetterToIndexMap() { SIntDict<T>::setAutoDelete(TRUE); }
int compareItems(QCollection::Item item1, QCollection::Item item2)
{
T *l1=(T *)item1;
T *l2=(T *)item2;
return (int)l1->letter()-(int)l2->letter();
}
void append(uint letter,typename T::ElementType *elem)
{
T *l = SIntDict<T>::find((int)letter);
if (l==0)
{
l = new T(letter);
SIntDict<T>::inSort((int)letter,l);
}
l->append(elem);
}
};
static LetterToIndexMap<MemberIndexList> g_memberIndexLetterUsed[CMHL_Total];
static LetterToIndexMap<MemberIndexList> g_fileIndexLetterUsed[FMHL_Total];
static LetterToIndexMap<MemberIndexList> g_namespaceIndexLetterUsed[NMHL_Total];
......@@ -1783,7 +1758,7 @@ class PrefixIgnoreClassList : public ClassList
class AlphaIndexTableCell
{
public:
AlphaIndexTableCell(int row,int col,uchar letter,ClassDef *cd) :
AlphaIndexTableCell(int row,int col,uint letter,ClassDef *cd) :
m_letter(letter), m_class(cd), m_row(row), m_col(col)
{ //printf("AlphaIndexTableCell(%d,%d,%c,%s)\n",row,col,letter!=0 ? letter: '-',
// cd!=(ClassDef*)0x8 ? cd->name().data() : "<null>");
......@@ -1914,7 +1889,7 @@ static void writeAlphabeticalClassList(OutputList &ol)
if (cd->isLinkableInProject() && cd->templateMaster()==0)
{
int index = getPrefixIndex(cd->className());
startLetter=toupper(cd->className().at(index))&0xFF;
startLetter=getUtf8Code(cd->className(),index);
// Do some sorting again, since the classes are sorted by name with
// prefix, which should be ignored really.
if (cd->getLanguage()==SrcLangExt_VHDL)
......@@ -1954,7 +1929,7 @@ static void writeAlphabeticalClassList(OutputList &ol)
{
uint l = cl->letter();
// add special header cell
tableRows->append(new AlphaIndexTableCell(row,col,(uchar)l,(ClassDef*)0x8));
tableRows->append(new AlphaIndexTableCell(row,col,l,(ClassDef*)0x8));
row++;
tableRows->append(new AlphaIndexTableCell(row,col,0,(ClassDef*)0x8));
row++;
......@@ -2017,7 +1992,7 @@ static void writeAlphabeticalClassList(OutputList &ol)
ol.writeString("<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\">"
"<tr>"
"<td><div class=\"ah\">&#160;&#160;");
ol.writeString(s);
ol.writeString(QString(QChar(cell->letter())).utf8());
ol.writeString( "&#160;&#160;</div>"
"</td>"
"</tr>"
......
......@@ -5,7 +5,7 @@ function convertToId(search)
{
var c = search.charAt(i);
var cn = c.charCodeAt(0);
if (c.match(/[a-z0-9]/))
if (c.match(/[a-z0-9\u0080-\uFFFF]/))
{
result+=c;
}
......@@ -310,22 +310,20 @@ function SearchBox(name, resultsPath, inFrame, label)
var searchValue = this.DOMSearchField().value.replace(/^ +/, "");
var code = searchValue.toLowerCase().charCodeAt(0);
var hexCode;
if (code<16)
var idxChar = searchValue.substr(0, 1).toLowerCase();
if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair
{
hexCode="0"+code.toString(16);
}
else
{
hexCode=code.toString(16);
idxChar = searchValue.substr(0, 2);
}
var resultsPage;
var resultsPageWithSearch;
var hasResultsPage;
if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1')
var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar);
if (idx!=-1)
{
var hexCode=idx.toString(16);
resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';
resultsPageWithSearch = resultsPage+'?'+escape(searchValue);
hasResultsPage = true;
......
......@@ -358,7 +358,7 @@ function main()
$sorted = run_query($query);
// Now output the HTML stuff...
// End the HTML form
end_form(preg_replace("/[^a-zA-Z0-9\-\_\.]/i", " ", $query ));
end_form(preg_replace("/[^a-zA-Z0-9\-\_\.\x80-\xFF]/i", " ", $query ));
// report results to the user
report_results($sorted);
end_page();
......
......@@ -358,7 +358,7 @@
" $sorted = run_query($query);\n"
" // Now output the HTML stuff...\n"
" // End the HTML form\n"
" end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.]/i\", \" \", $query ));\n"
" end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.\\x80-\\xFF]/i\", \" \", $query ));\n"
" // report results to the user\n"
" report_results($sorted);\n"
" end_page();\n"
......
......@@ -5,7 +5,7 @@
" {\n"
" var c = search.charAt(i);\n"
" var cn = c.charCodeAt(0);\n"
" if (c.match(/[a-z0-9]/))\n"
" if (c.match(/[a-z0-9\\u0080-\\uFFFF]/))\n"
" {\n"
" result+=c;\n"
" }\n"
......@@ -310,22 +310,20 @@
" var searchValue = this.DOMSearchField().value.replace(/^ +/, \"\");\n"
"\n"
" var code = searchValue.toLowerCase().charCodeAt(0);\n"
" var hexCode;\n"
" if (code<16) \n"
" var idxChar = searchValue.substr(0, 1).toLowerCase();\n"
" if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair\n"
" {\n"
" hexCode=\"0\"+code.toString(16);\n"
" }\n"
" else \n"
" {\n"
" hexCode=code.toString(16);\n"
" idxChar = searchValue.substr(0, 2);\n"
" }\n"
"\n"
" var resultsPage;\n"
" var resultsPageWithSearch;\n"
" var hasResultsPage;\n"
"\n"
" if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1')\n"
" var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar);\n"
" if (idx!=-1)\n"
" {\n"
" var hexCode=idx.toString(16);\n"
" resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';\n"
" resultsPageWithSearch = resultsPage+'?'+escape(searchValue);\n"
" hasResultsPage = true;\n"
......
......@@ -587,8 +587,6 @@ static const char search_script[]=
#include "search_js.h"
;
#define MEMBER_INDEX_ENTRIES 256
#define SEARCH_INDEX_ALL 0
#define SEARCH_INDEX_CLASSES 1
#define SEARCH_INDEX_NAMESPACES 2
......@@ -606,21 +604,31 @@ static const char search_script[]=
#define SEARCH_INDEX_PAGES 14
#define NUM_SEARCH_INDICES 15
class SearchIndexList : public SDict< QList<Definition> >
class SearchDefinitionList : public QList<Definition>
{
public:
SearchDefinitionList(uint letter) : m_letter(letter) {}
uint letter() const { return m_letter; }
private:
uint m_letter;
};
class SearchIndexList : public SDict< SearchDefinitionList >
{
public:
SearchIndexList(int size=17) : SDict< QList<Definition> >(size,FALSE)
typedef Definition ElementType;
SearchIndexList(uint letter) : SDict<SearchDefinitionList>(17,FALSE), m_letter(letter)
{
setAutoDelete(TRUE);
}
~SearchIndexList() {}
void append(Definition *d)
{
QList<Definition> *l = find(d->name());
SearchDefinitionList *l = find(d->name());
if (l==0)
{
l=new QList<Definition>;
SDict< QList<Definition> >::append(d->name(),l);
l=new SearchDefinitionList(m_letter);
SDict<SearchDefinitionList>::append(d->name(),l);
}
l->append(d);
}
......@@ -632,10 +640,13 @@ class SearchIndexList : public SDict< QList<Definition> >
QCString n2 = md2->first()->localName();
return qstricmp(n1.data(),n2.data());
}
uint letter() const { return m_letter; }
private:
uint m_letter;
};
static void addMemberToSearchIndex(
SearchIndexList symbols[NUM_SEARCH_INDICES][MEMBER_INDEX_ENTRIES],
LetterToIndexMap<SearchIndexList> symbols[NUM_SEARCH_INDICES],
int symbolCount[NUM_SEARCH_INDICES],
MemberDef *md)
{
......@@ -653,58 +664,57 @@ static void addMemberToSearchIndex(
)
{
QCString n = md->name();
uchar charCode = (uchar)n.at(0);
uint letter = charCode<128 ? tolower(charCode) : charCode;
if (!n.isEmpty())
{
uint letter = getUtf8CodeToLower(n,0);
bool isFriendToHide = hideFriendCompounds &&
(QCString(md->typeString())=="friend class" ||
QCString(md->typeString())=="friend struct" ||
QCString(md->typeString())=="friend union");
if (!(md->isFriend() && isFriendToHide))
{
symbols[SEARCH_INDEX_ALL][letter].append(md);
symbols[SEARCH_INDEX_ALL].append(letter,md);
symbolCount[SEARCH_INDEX_ALL]++;
}
if (md->isFunction() || md->isSlot() || md->isSignal())
{
symbols[SEARCH_INDEX_FUNCTIONS][letter].append(md);
symbols[SEARCH_INDEX_FUNCTIONS].append(letter,md);
symbolCount[SEARCH_INDEX_FUNCTIONS]++;
}
else if (md->isVariable())
{
symbols[SEARCH_INDEX_VARIABLES][letter].append(md);
symbols[SEARCH_INDEX_VARIABLES].append(letter,md);
symbolCount[SEARCH_INDEX_VARIABLES]++;
}
else if (md->isTypedef())
{
symbols[SEARCH_INDEX_TYPEDEFS][letter].append(md);
symbols[SEARCH_INDEX_TYPEDEFS].append(letter,md);
symbolCount[SEARCH_INDEX_TYPEDEFS]++;
}
else if (md->isEnumerate())
{
symbols[SEARCH_INDEX_ENUMS][letter].append(md);
symbols[SEARCH_INDEX_ENUMS].append(letter,md);
symbolCount[SEARCH_INDEX_ENUMS]++;
}
else if (md->isEnumValue())
{
symbols[SEARCH_INDEX_ENUMVALUES][letter].append(md);
symbols[SEARCH_INDEX_ENUMVALUES].append(letter,md);
symbolCount[SEARCH_INDEX_ENUMVALUES]++;
}
else if (md->isProperty())
{
symbols[SEARCH_INDEX_PROPERTIES][letter].append(md);
symbols[SEARCH_INDEX_PROPERTIES].append(letter,md);
symbolCount[SEARCH_INDEX_PROPERTIES]++;
}
else if (md->isEvent())
{
symbols[SEARCH_INDEX_EVENTS][letter].append(md);
symbols[SEARCH_INDEX_EVENTS].append(letter,md);
symbolCount[SEARCH_INDEX_EVENTS]++;
}
else if (md->isRelated() || md->isForeign() ||
(md->isFriend() && !isFriendToHide))
{
symbols[SEARCH_INDEX_RELATED][letter].append(md);
symbols[SEARCH_INDEX_RELATED].append(letter,md);
symbolCount[SEARCH_INDEX_RELATED]++;
}
}
......@@ -716,47 +726,48 @@ static void addMemberToSearchIndex(
)
{
QCString n = md->name();
uchar charCode = (uchar)n.at(0);
uint letter = charCode<128 ? tolower(charCode) : charCode;
if (!n.isEmpty())
{
symbols[SEARCH_INDEX_ALL][letter].append(md);
uint letter = getUtf8CodeToLower(n,0);
symbols[SEARCH_INDEX_ALL].append(letter,md);
symbolCount[SEARCH_INDEX_ALL]++;
if (md->isFunction())
{
symbols[SEARCH_INDEX_FUNCTIONS][letter].append(md);
symbols[SEARCH_INDEX_FUNCTIONS].append(letter,md);
symbolCount[SEARCH_INDEX_FUNCTIONS]++;
}
else if (md->isVariable())
{
symbols[SEARCH_INDEX_VARIABLES][letter].append(md);
symbols[SEARCH_INDEX_VARIABLES].append(letter,md);
symbolCount[SEARCH_INDEX_VARIABLES]++;
}
else if (md->isTypedef())
{
symbols[SEARCH_INDEX_TYPEDEFS][letter].append(md);
symbols[SEARCH_INDEX_TYPEDEFS].append(letter,md);
symbolCount[SEARCH_INDEX_TYPEDEFS]++;
}
else if (md->isEnumerate())
{
symbols[SEARCH_INDEX_ENUMS][letter].append(md);
symbols[SEARCH_INDEX_ENUMS].append(letter,md);
symbolCount[SEARCH_INDEX_ENUMS]++;
}
else if (md->isEnumValue())
{
symbols[SEARCH_INDEX_ENUMVALUES][letter].append(md);
symbols[SEARCH_INDEX_ENUMVALUES].append(letter,md);
symbolCount[SEARCH_INDEX_ENUMVALUES]++;
}
else if (md->isDefine())
{
symbols[SEARCH_INDEX_DEFINES][letter].append(md);
symbols[SEARCH_INDEX_DEFINES].append(letter,md);
symbolCount[SEARCH_INDEX_DEFINES]++;
}
}
}
}
// see also function convertToId() in search.js, which should match in
// behaviour
static QCString searchId(const QCString &s)
{
int c;
......@@ -765,11 +776,15 @@ static QCString searchId(const QCString &s)
for (i=0;i<s.length();i++)
{
c=s.at(i);
if ((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'))
if (c>0x7f || c<0) // part of multibyte character
{
result+=(char)c;
}
else if (isalnum(c)) // simply alpha numerical character
{
result+=(char)tolower(c);
}
else
else // other 'unprintable' characters
{
char val[4];
sprintf(val,"_%02x",(uchar)c);
......@@ -780,7 +795,7 @@ static QCString searchId(const QCString &s)
}
static int g_searchIndexCount[NUM_SEARCH_INDICES];
static SearchIndexList g_searchIndexSymbols[NUM_SEARCH_INDICES][MEMBER_INDEX_ENTRIES];
static LetterToIndexMap<SearchIndexList> g_searchIndexSymbols[NUM_SEARCH_INDICES];
static const char *g_searchIndexName[NUM_SEARCH_INDICES] =
{
"all",
......@@ -834,12 +849,11 @@ void writeJavascriptSearchIndex()
ClassDef *cd;
for (;(cd=cli.current());++cli)
{
uchar charCode = (uchar)cd->localName().at(0);
uint letter = charCode<128 ? tolower(charCode) : charCode;
uint letter = getUtf8CodeToLower(cd->localName(),0);
if (cd->isLinkable() && isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(cd);
g_searchIndexSymbols[SEARCH_INDEX_CLASSES][letter].append(cd);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,cd);
g_searchIndexSymbols[SEARCH_INDEX_CLASSES].append(letter,cd);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_CLASSES]++;
}
......@@ -850,12 +864,11 @@ void writeJavascriptSearchIndex()
NamespaceDef *nd;
for (;(nd=nli.current());++nli)
{
uchar charCode = (uchar)nd->name().at(0);
uint letter = charCode<128 ? tolower(charCode) : charCode;
uint letter = getUtf8CodeToLower(nd->name(),0);
if (nd->isLinkable() && isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(nd);
g_searchIndexSymbols[SEARCH_INDEX_NAMESPACES][letter].append(nd);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,nd);
g_searchIndexSymbols[SEARCH_INDEX_NAMESPACES].append(letter,nd);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_NAMESPACES]++;
}
......@@ -870,12 +883,11 @@ void writeJavascriptSearchIndex()
FileDef *fd;
for (;(fd=fni.current());++fni)
{
uchar charCode = (uchar)fd->name().at(0);
uint letter = charCode<128 ? tolower(charCode) : charCode;
uint letter = getUtf8CodeToLower(fd->name(),0);
if (fd->isLinkable() && isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(fd);
g_searchIndexSymbols[SEARCH_INDEX_FILES][letter].append(fd);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,fd);
g_searchIndexSymbols[SEARCH_INDEX_FILES].append(letter,fd);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_FILES]++;
}
......@@ -930,8 +942,8 @@ void writeJavascriptSearchIndex()
uint letter = charCode<128 ? tolower(charCode) : charCode;
if (isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(gd);
g_searchIndexSymbols[SEARCH_INDEX_GROUPS][letter].append(gd);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,gd);
g_searchIndexSymbols[SEARCH_INDEX_GROUPS].append(letter,gd);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_GROUPS]++;
}
......@@ -953,8 +965,8 @@ void writeJavascriptSearchIndex()
uint letter = charCode<128 ? tolower(charCode) : charCode;
if (isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(pd);
g_searchIndexSymbols[SEARCH_INDEX_PAGES][letter].append(pd);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,pd);
g_searchIndexSymbols[SEARCH_INDEX_PAGES].append(letter,pd);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_PAGES]++;
}
......@@ -970,8 +982,8 @@ void writeJavascriptSearchIndex()
uint letter = charCode<128 ? tolower(charCode) : charCode;
if (isId(letter))
{
g_searchIndexSymbols[SEARCH_INDEX_ALL][letter].append(Doxygen::mainPage);
g_searchIndexSymbols[SEARCH_INDEX_PAGES][letter].append(Doxygen::mainPage);
g_searchIndexSymbols[SEARCH_INDEX_ALL].append(letter,Doxygen::mainPage);
g_searchIndexSymbols[SEARCH_INDEX_PAGES].append(letter,Doxygen::mainPage);
g_searchIndexCount[SEARCH_INDEX_ALL]++;
g_searchIndexCount[SEARCH_INDEX_PAGES]++;
}
......@@ -979,29 +991,29 @@ void writeJavascriptSearchIndex()
}
// sort all lists
int i,p;
int i;
for (i=0;i<NUM_SEARCH_INDICES;i++)
{
for (p=0;p<MEMBER_INDEX_ENTRIES;p++)
{
if (g_searchIndexSymbols[i][p].count()>0)
SIntDict<SearchIndexList>::Iterator it(g_searchIndexSymbols[i]);
SearchIndexList *sl;
for (it.toFirst();(sl=it.current());++it)
{
g_searchIndexSymbols[i][p].sort();
}
sl->sort();
}
}
// write index files
QCString searchDirName = Config_getString("HTML_OUTPUT")+"/search";
for (i=0;i<NUM_SEARCH_INDICES;i++)
{
for (p=0;p<MEMBER_INDEX_ENTRIES;p++)
for (i=0;i<NUM_SEARCH_INDICES;i++) // for each index
{
if (g_searchIndexSymbols[i][p].count()>0)
SIntDict<SearchIndexList>::Iterator it(g_searchIndexSymbols[i]);
SearchIndexList *sl;
int p=0;
for (it.toFirst();(sl=it.current());++it,++p) // for each letter
{
QCString baseName;
baseName.sprintf("%s_%02x",g_searchIndexName[i],p);
baseName.sprintf("%s_%x",g_searchIndexName[i],p);
QCString fileName = searchDirName + "/"+baseName+".html";
QCString dataFileName = searchDirName + "/"+baseName+".js";
......@@ -1060,8 +1072,8 @@ void writeJavascriptSearchIndex()
ti << "[" << endl;
bool firstEntry=TRUE;
SDict<QList<Definition> >::Iterator li(g_searchIndexSymbols[i][p]);
QList<Definition> *dl;
SDict<SearchDefinitionList>::Iterator li(*sl);
SearchDefinitionList *dl;
int itemCount=0;
for (li.toFirst();(dl=li.current());++li)
{
......@@ -1253,7 +1265,6 @@ void writeJavascriptSearchIndex()
}
}
}
}
{
QFile f(searchDirName+"/search.js");
......@@ -1275,9 +1286,12 @@ void writeJavascriptSearchIndex()
{
if (!first) t << "," << endl;
t << " " << j << ": \"";
for (p=0;p<MEMBER_INDEX_ENTRIES;p++)
SIntDict<SearchIndexList>::Iterator it(g_searchIndexSymbols[i]);
SearchIndexList *sl;
for (it.toFirst();(sl=it.current());++it) // for each letter
{
t << (g_searchIndexSymbols[i][p].count()>0 ? "1" : "0");
t << QString( QChar( sl->letter() ) ).utf8();
}
t << "\"";
first=FALSE;
......
......@@ -25,6 +25,7 @@
#include <qlist.h>
#include <ctype.h>
#include "types.h"
#include "sortdict.h"
//--------------------------------------------------------------------
......@@ -87,6 +88,33 @@ class TextGeneratorOLImpl : public TextGeneratorIntf
//--------------------------------------------------------------------
/** @brief maps a unicode character code to a list of T::ElementType's
*/
template<class T>
class LetterToIndexMap : public SIntDict<T>
{
public:
LetterToIndexMap() { SIntDict<T>::setAutoDelete(TRUE); }
int compareItems(QCollection::Item item1, QCollection::Item item2)
{
T *l1=(T *)item1;
T *l2=(T *)item2;
return (int)l1->letter()-(int)l2->letter();
}
void append(uint letter,typename T::ElementType *elem)
{
T *l = SIntDict<T>::find((int)letter);
if (l==0)
{
l = new T(letter);
SIntDict<T>::inSort((int)letter,l);
}
l->append(elem);
}
};
//--------------------------------------------------------------------
QCString langToString(SrcLangExt lang);
QCString getLanguageSpecificSeparator(SrcLangExt lang,bool classScope=FALSE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment