Commit bca6baee authored by Dimitri van Heesch's avatar Dimitri van Heesch

Bug 705910 - Indexing and searching cannot treat non ASCII identifiers

parent f6bc941e
...@@ -174,7 +174,7 @@ QHP_SECT_FILTER_ATTRS = ...@@ -174,7 +174,7 @@ QHP_SECT_FILTER_ATTRS =
QHG_LOCATION = QHG_LOCATION =
GENERATE_ECLIPSEHELP = YES GENERATE_ECLIPSEHELP = YES
ECLIPSE_DOC_ID = org.doxygen.qtools ECLIPSE_DOC_ID = org.doxygen.qtools
DISABLE_INDEX = YES DISABLE_INDEX = NO
GENERATE_TREEVIEW = YES GENERATE_TREEVIEW = YES
ENUM_VALUES_PER_LINE = 4 ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250 TREEVIEW_WIDTH = 250
......
This diff is collapsed.
...@@ -5,7 +5,7 @@ function convertToId(search) ...@@ -5,7 +5,7 @@ function convertToId(search)
{ {
var c = search.charAt(i); var c = search.charAt(i);
var cn = c.charCodeAt(0); var cn = c.charCodeAt(0);
if (c.match(/[a-z0-9]/)) if (c.match(/[a-z0-9\u0080-\uFFFF]/))
{ {
result+=c; result+=c;
} }
...@@ -310,22 +310,20 @@ function SearchBox(name, resultsPath, inFrame, label) ...@@ -310,22 +310,20 @@ function SearchBox(name, resultsPath, inFrame, label)
var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); var searchValue = this.DOMSearchField().value.replace(/^ +/, "");
var code = searchValue.toLowerCase().charCodeAt(0); var code = searchValue.toLowerCase().charCodeAt(0);
var hexCode; var idxChar = searchValue.substr(0, 1).toLowerCase();
if (code<16) if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair
{ {
hexCode="0"+code.toString(16); idxChar = searchValue.substr(0, 2);
}
else
{
hexCode=code.toString(16);
} }
var resultsPage; var resultsPage;
var resultsPageWithSearch; var resultsPageWithSearch;
var hasResultsPage; var hasResultsPage;
if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1') var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar);
if (idx!=-1)
{ {
var hexCode=idx.toString(16);
resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html'; resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';
resultsPageWithSearch = resultsPage+'?'+escape(searchValue); resultsPageWithSearch = resultsPage+'?'+escape(searchValue);
hasResultsPage = true; hasResultsPage = true;
......
...@@ -358,7 +358,7 @@ function main() ...@@ -358,7 +358,7 @@ function main()
$sorted = run_query($query); $sorted = run_query($query);
// Now output the HTML stuff... // Now output the HTML stuff...
// End the HTML form // End the HTML form
end_form(preg_replace("/[^a-zA-Z0-9\-\_\.]/i", " ", $query )); end_form(preg_replace("/[^a-zA-Z0-9\-\_\.\x80-\xFF]/i", " ", $query ));
// report results to the user // report results to the user
report_results($sorted); report_results($sorted);
end_page(); end_page();
......
...@@ -358,7 +358,7 @@ ...@@ -358,7 +358,7 @@
" $sorted = run_query($query);\n" " $sorted = run_query($query);\n"
" // Now output the HTML stuff...\n" " // Now output the HTML stuff...\n"
" // End the HTML form\n" " // End the HTML form\n"
" end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.]/i\", \" \", $query ));\n" " end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.\\x80-\\xFF]/i\", \" \", $query ));\n"
" // report results to the user\n" " // report results to the user\n"
" report_results($sorted);\n" " report_results($sorted);\n"
" end_page();\n" " end_page();\n"
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
" {\n" " {\n"
" var c = search.charAt(i);\n" " var c = search.charAt(i);\n"
" var cn = c.charCodeAt(0);\n" " var cn = c.charCodeAt(0);\n"
" if (c.match(/[a-z0-9]/))\n" " if (c.match(/[a-z0-9\\u0080-\\uFFFF]/))\n"
" {\n" " {\n"
" result+=c;\n" " result+=c;\n"
" }\n" " }\n"
...@@ -310,22 +310,20 @@ ...@@ -310,22 +310,20 @@
" var searchValue = this.DOMSearchField().value.replace(/^ +/, \"\");\n" " var searchValue = this.DOMSearchField().value.replace(/^ +/, \"\");\n"
"\n" "\n"
" var code = searchValue.toLowerCase().charCodeAt(0);\n" " var code = searchValue.toLowerCase().charCodeAt(0);\n"
" var hexCode;\n" " var idxChar = searchValue.substr(0, 1).toLowerCase();\n"
" if (code<16) \n" " if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair\n"
" {\n" " {\n"
" hexCode=\"0\"+code.toString(16);\n" " idxChar = searchValue.substr(0, 2);\n"
" }\n"
" else \n"
" {\n"
" hexCode=code.toString(16);\n"
" }\n" " }\n"
"\n" "\n"
" var resultsPage;\n" " var resultsPage;\n"
" var resultsPageWithSearch;\n" " var resultsPageWithSearch;\n"
" var hasResultsPage;\n" " var hasResultsPage;\n"
"\n" "\n"
" if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1')\n" " var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar);\n"
" if (idx!=-1)\n"
" {\n" " {\n"
" var hexCode=idx.toString(16);\n"
" resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';\n" " resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';\n"
" resultsPageWithSearch = resultsPage+'?'+escape(searchValue);\n" " resultsPageWithSearch = resultsPage+'?'+escape(searchValue);\n"
" hasResultsPage = true;\n" " hasResultsPage = true;\n"
......
This diff is collapsed.
...@@ -108,7 +108,7 @@ class SDict ...@@ -108,7 +108,7 @@ class SDict
* \param caseSensitive indicated whether the keys should be sorted * \param caseSensitive indicated whether the keys should be sorted
* in a case sensitive way. * in a case sensitive way.
*/ */
SDict(int size,bool caseSensitive=TRUE) : m_sizeIndex(0) SDict(int size=17,bool caseSensitive=TRUE) : m_sizeIndex(0)
{ {
m_list = new SList<T>(this); m_list = new SList<T>(this);
#if AUTORESIZE #if AUTORESIZE
...@@ -454,7 +454,7 @@ class SIntDict ...@@ -454,7 +454,7 @@ class SIntDict
* \param size The size of the dictionary. Should be a prime number for * \param size The size of the dictionary. Should be a prime number for
* best distribution of elements. * best distribution of elements.
*/ */
SIntDict(int size) : m_sizeIndex(0) SIntDict(int size=17) : m_sizeIndex(0)
{ {
m_list = new SIntList<T>(this); m_list = new SIntList<T>(this);
#if AUTORESIZE #if AUTORESIZE
...@@ -636,7 +636,7 @@ class SIntDict ...@@ -636,7 +636,7 @@ class SIntDict
{ {
return m_li->current(); return m_li->current();
} }
/*! Moves the iterator to the next element. /*! Moves the iterator to the next element.
* \return the new "current" element, or zero if the iterator was * \return the new "current" element, or zero if the iterator was
* already pointing at the last element. * already pointing at the last element.
...@@ -659,6 +659,76 @@ class SIntDict ...@@ -659,6 +659,76 @@ class SIntDict
QListIterator<T> *m_li; QListIterator<T> *m_li;
}; };
class IteratorDict; // first forward declare
friend class IteratorDict; // then make it a friend
/*! Simple iterator for SDict. It iterates over the dictionary elements
* in an unsorted way, but does provide information about the element's key.
*/
class IteratorDict
{
public:
/*! Create an iterator given the dictionary. */
IteratorDict(const SIntDict<T> &dict)
{
m_di = new QIntDictIterator<T>(*dict.m_dict);
}
/*! Destroys the dictionary */
virtual ~IteratorDict()
{
delete m_di;
}
/*! Set the iterator to the first element in the list.
* \return The first compound, or zero if the list was empty.
*/
T *toFirst() const
{
return m_di->toFirst();
}
/*! Set the iterator to the last element in the list.
* \return The first compound, or zero if the list was empty.
*/
T *toLast() const
{
return m_di->toLast();
}
/*! Returns the current compound */
T *current() const
{
return m_di->current();
}
/*! Returns the current key */
int currentKey() const
{
return m_di->currentKey();
}
/*! Moves the iterator to the next element.
* \return the new "current" element, or zero if the iterator was
* already pointing at the last element.
*/
T *operator++()
{
return m_di->operator++();
}
/*! Moves the iterator to the previous element.
* \return the new "current" element, or zero if the iterator was
* already pointing at the first element.
*/
T *operator--()
{
return m_di->operator--();
}
private:
QDictIterator<T> *m_di;
};
}; };
#endif #endif
...@@ -7919,3 +7919,72 @@ void addDocCrossReference(MemberDef *src,MemberDef *dst) ...@@ -7919,3 +7919,72 @@ void addDocCrossReference(MemberDef *src,MemberDef *dst)
} }
} }
//--------------------------------------------------------------------------------------
/*! @brief Get one unicode character as an unsigned integer from utf-8 string
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT
* @see getNextUtf8OrToLower()
* @see getNextUtf8OrToUpper()
*/
uint getUtf8Code( const QCString& s, int idx )
{
const int length = s.length();
if (idx >= length) { return 0; }
const uint c0 = (uchar)s.at(idx);
if ( c0 < 0xC2 || c0 >= 0xF8 ) // 1 byte character
{
return c0;
}
if (idx+1 >= length) { return 0; }
const uint c1 = ((uchar)s.at(idx+1)) & 0x3f;
if ( c0 < 0xE0 ) // 2 byte character
{
return ((c0 & 0x1f) << 6) | c1;
}
if (idx+2 >= length) { return 0; }
const uint c2 = ((uchar)s.at(idx+2)) & 0x3f;
if ( c0 < 0xF0 ) // 3 byte character
{
return ((c0 & 0x0f) << 12) | (c1 << 6) | c2;
}
if (idx+3 >= length) { return 0; }
// 4 byte character
const uint c3 = ((uchar)s.at(idx+3)) & 0x3f;
return ((c0 & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
}
/*! @brief Returns one unicode character as an unsigned integer
* from utf-8 string, making the character lower case if it was upper case.
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
* @see getNextUtf8Code()
*/
uint getUtf8CodeToLower( const QCString& s, int idx )
{
const uint v = getUtf8Code( s, idx );
return v < 0x7f ? tolower( v ) : v;
}
/*! @brief Returns one unicode character as ian unsigned interger
* from utf-8 string, making the character upper case if it was lower case.
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
* @see getNextUtf8Code()
*/
uint getUtf8CodeToUpper( const QCString& s, int idx )
{
const uint v = getUtf8Code( s, idx );
return v < 0x7f ? toupper( v ) : v;
}
//--------------------------------------------------------------------------------------
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <qlist.h> #include <qlist.h>
#include <ctype.h> #include <ctype.h>
#include "types.h" #include "types.h"
#include "sortdict.h"
//-------------------------------------------------------------------- //--------------------------------------------------------------------
...@@ -87,6 +88,33 @@ class TextGeneratorOLImpl : public TextGeneratorIntf ...@@ -87,6 +88,33 @@ class TextGeneratorOLImpl : public TextGeneratorIntf
//-------------------------------------------------------------------- //--------------------------------------------------------------------
/** @brief maps a unicode character code to a list of T::ElementType's
*/
template<class T>
class LetterToIndexMap : public SIntDict<T>
{
public:
LetterToIndexMap() { SIntDict<T>::setAutoDelete(TRUE); }
int compareItems(QCollection::Item item1, QCollection::Item item2)
{
T *l1=(T *)item1;
T *l2=(T *)item2;
return (int)l1->letter()-(int)l2->letter();
}
void append(uint letter,typename T::ElementType *elem)
{
T *l = SIntDict<T>::find((int)letter);
if (l==0)
{
l = new T(letter);
SIntDict<T>::inSort((int)letter,l);
}
l->append(elem);
}
};
//--------------------------------------------------------------------
QCString langToString(SrcLangExt lang); QCString langToString(SrcLangExt lang);
QCString getLanguageSpecificSeparator(SrcLangExt lang,bool classScope=FALSE); QCString getLanguageSpecificSeparator(SrcLangExt lang,bool classScope=FALSE);
...@@ -411,5 +439,9 @@ bool fileVisibleInIndex(FileDef *fd,bool &genSourceFile); ...@@ -411,5 +439,9 @@ bool fileVisibleInIndex(FileDef *fd,bool &genSourceFile);
void addDocCrossReference(MemberDef *src,MemberDef *dst); void addDocCrossReference(MemberDef *src,MemberDef *dst);
uint getUtf8Code( const QCString& s, int idx );
uint getUtf8CodeToLower( const QCString& s, int idx );
uint getUtf8CodeToUpper( const QCString& s, int idx );
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment