Commit e193c540 authored by Dimitri van Heesch's avatar Dimitri van Heesch

Bug 705910 - Indexing and searching cannot treat non ASCII identifiers

parent 5b6730ab
......@@ -174,7 +174,7 @@ QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = YES
ECLIPSE_DOC_ID = org.doxygen.qtools
DISABLE_INDEX = YES
DISABLE_INDEX = NO
GENERATE_TREEVIEW = YES
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
......
This diff is collapsed.
......@@ -108,7 +108,7 @@ class SDict
* \param caseSensitive indicated whether the keys should be sorted
* in a case sensitive way.
*/
SDict(int size,bool caseSensitive=TRUE) : m_sizeIndex(0)
SDict(int size=17,bool caseSensitive=TRUE) : m_sizeIndex(0)
{
m_list = new SList<T>(this);
#if AUTORESIZE
......@@ -454,7 +454,7 @@ class SIntDict
* \param size The size of the dictionary. Should be a prime number for
* best distribution of elements.
*/
SIntDict(int size) : m_sizeIndex(0)
SIntDict(int size=17) : m_sizeIndex(0)
{
m_list = new SIntList<T>(this);
#if AUTORESIZE
......@@ -636,7 +636,7 @@ class SIntDict
{
return m_li->current();
}
/*! Moves the iterator to the next element.
* \return the new "current" element, or zero if the iterator was
* already pointing at the last element.
......@@ -659,6 +659,76 @@ class SIntDict
QListIterator<T> *m_li;
};
class IteratorDict; // first forward declare
friend class IteratorDict; // then make it a friend
/*! Simple iterator for SDict. It iterates over the dictionary elements
* in an unsorted way, but does provide information about the element's key.
*/
class IteratorDict
{
public:
/*! Create an iterator given the dictionary. */
IteratorDict(const SIntDict<T> &dict)
{
m_di = new QIntDictIterator<T>(*dict.m_dict);
}
/*! Destroys the dictionary */
virtual ~IteratorDict()
{
delete m_di;
}
/*! Set the iterator to the first element in the list.
* \return The first compound, or zero if the list was empty.
*/
T *toFirst() const
{
return m_di->toFirst();
}
/*! Set the iterator to the last element in the list.
* \return The first compound, or zero if the list was empty.
*/
T *toLast() const
{
return m_di->toLast();
}
/*! Returns the current compound */
T *current() const
{
return m_di->current();
}
/*! Returns the current key */
int currentKey() const
{
return m_di->currentKey();
}
/*! Moves the iterator to the next element.
* \return the new "current" element, or zero if the iterator was
* already pointing at the last element.
*/
T *operator++()
{
return m_di->operator++();
}
/*! Moves the iterator to the previous element.
* \return the new "current" element, or zero if the iterator was
* already pointing at the first element.
*/
T *operator--()
{
return m_di->operator--();
}
private:
QDictIterator<T> *m_di;
};
};
#endif
......@@ -7919,3 +7919,72 @@ void addDocCrossReference(MemberDef *src,MemberDef *dst)
}
}
//--------------------------------------------------------------------------------------
/*! @brief Get one unicode character as an unsigned integer from utf-8 string
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT
* @see getNextUtf8OrToLower()
* @see getNextUtf8OrToUpper()
*/
uint getUtf8Code( const QCString& s, int idx )
{
const int length = s.length();
if (idx >= length) { return 0; }
const uint c0 = (uchar)s.at(idx);
if ( c0 < 0xC2 || c0 >= 0xF8 ) // 1 byte character
{
return c0;
}
if (idx+1 >= length) { return 0; }
const uint c1 = ((uchar)s.at(idx+1)) & 0x3f;
if ( c0 < 0xE0 ) // 2 byte character
{
return ((c0 & 0x1f) << 6) | c1;
}
if (idx+2 >= length) { return 0; }
const uint c2 = ((uchar)s.at(idx+2)) & 0x3f;
if ( c0 < 0xF0 ) // 3 byte character
{
return ((c0 & 0x0f) << 12) | (c1 << 6) | c2;
}
if (idx+3 >= length) { return 0; }
// 4 byte character
const uint c3 = ((uchar)s.at(idx+3)) & 0x3f;
return ((c0 & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
}
/*! @brief Returns one unicode character as an unsigned integer
* from utf-8 string, making the character lower case if it was upper case.
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
* @see getNextUtf8Code()
*/
uint getUtf8CodeToLower( const QCString& s, int idx )
{
const uint v = getUtf8Code( s, idx );
return v < 0x7f ? tolower( v ) : v;
}
/*! @brief Returns one unicode character as ian unsigned interger
* from utf-8 string, making the character upper case if it was lower case.
*
* @param s utf-8 encoded string
* @param idx byte position of given string \a s.
* @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
* @see getNextUtf8Code()
*/
uint getUtf8CodeToUpper( const QCString& s, int idx )
{
const uint v = getUtf8Code( s, idx );
return v < 0x7f ? toupper( v ) : v;
}
//--------------------------------------------------------------------------------------
......@@ -411,5 +411,9 @@ bool fileVisibleInIndex(FileDef *fd,bool &genSourceFile);
void addDocCrossReference(MemberDef *src,MemberDef *dst);
uint getUtf8Code( const QCString& s, int idx );
uint getUtf8CodeToLower( const QCString& s, int idx );
uint getUtf8CodeToUpper( const QCString& s, int idx );
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment