Bug 705910 - Indexing and searching cannot treat non ASCII identifiers

e193c540 · Dimitri van Heesch · 5b6730ab · e193c540 · e193c540 · e193c540
Commit e193c540 authored Sep 14, 2013 by Dimitri van Heesch
Showing with 551 additions and 523 deletions

Doxyfile qtools/Doxyfile +1 -1

index.cpp src/index.cpp +404 -519

sortdict.h src/sortdict.h +73 -3

util.cpp src/util.cpp +69 -0

util.h src/util.h +4 -0

No files found.
--- a/qtools/Doxyfile
+++ b/qtools/Doxyfile
@@ -174,7 +174,7 @@ QHP_SECT_FILTER_ATTRS  =
 QHG_LOCATION           =
 GENERATE_ECLIPSEHELP   = YES
 ECLIPSE_DOC_ID         = org.doxygen.qtools
-DISABLE_INDEX          = YES
+DISABLE_INDEX          = NO
 GENERATE_TREEVIEW      = YES
 ENUM_VALUES_PER_LINE   = 4
 TREEVIEW_WIDTH         = 250

--- a/src/index.cpp
+++ b/src/index.cpp
--- a/src/sortdict.h
+++ b/src/sortdict.h
@@ -108,7 +108,7 @@ class SDict
     *  \param caseSensitive indicated whether the keys should be sorted
     *         in a case sensitive way.
     */
-    SDict(int size,bool caseSensitive=TRUE) : m_sizeIndex(0)
+    SDict(int size=17,bool caseSensitive=TRUE) : m_sizeIndex(0)
    {
      m_list = new SList<T>(this);
 #if AUTORESIZE
@@ -454,7 +454,7 @@ class SIntDict
     *  \param size The size of the dictionary. Should be a prime number for
     *              best distribution of elements.
     */
-    SIntDict(int size) : m_sizeIndex(0)
+    SIntDict(int size=17) : m_sizeIndex(0)
    {
      m_list = new SIntList<T>(this);
 #if AUTORESIZE
@@ -636,7 +636,7 @@ class SIntDict
        {
          return m_li->current();
        }
        /*! Moves the iterator to the next element.
         *  \return the new "current" element, or zero if the iterator was
         *          already pointing at the last element.
@@ -659,6 +659,76 @@ class SIntDict
        QListIterator<T> *m_li;
    };
+    class IteratorDict;         // first forward declare
+    friend class IteratorDict;  // then make it a friend
+    /*! Simple iterator for SDict. It iterates over the dictionary elements
+     *  in an unsorted way, but does provide information about the element's key.
+     */
+    class IteratorDict
+    {
+      public:
+        /*! Create an iterator given the dictionary. */
+        IteratorDict(const SIntDict<T> &dict)
+        {
+          m_di = new QIntDictIterator<T>(*dict.m_dict);
+        }
+        /*! Destroys the dictionary */
+        virtual ~IteratorDict()
+        {
+          delete m_di;
+        }
+        /*! Set the iterator to the first element in the list. 
+         *  \return The first compound, or zero if the list was empty. 
+         */
+        T *toFirst() const
+        {
+          return m_di->toFirst();
+        }
+        /*! Set the iterator to the last element in the list. 
+         *  \return The first compound, or zero if the list was empty. 
+         */
+        T *toLast() const
+        {
+          return m_di->toLast();
+        }
+        /*! Returns the current compound */
+        T *current() const
+        {
+          return m_di->current();
+        }
+        /*! Returns the current key */
+        int currentKey() const
+        {
+          return m_di->currentKey();
+        }
+        /*! Moves the iterator to the next element.
+         *  \return the new "current" element, or zero if the iterator was
+         *          already pointing at the last element.
+         */
+        T *operator++()
+        {
+          return m_di->operator++();
+        }
+        /*! Moves the iterator to the previous element.
+         *  \return the new "current" element, or zero if the iterator was
+         *          already pointing at the first element.
+         */
+        T *operator--()
+        {
+          return m_di->operator--();
+        }
+      private:
+        QDictIterator<T> *m_di;
+    };
 };
 #endif
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -7919,3 +7919,72 @@ void addDocCrossReference(MemberDef *src,MemberDef *dst)
  }
 }
+//--------------------------------------------------------------------------------------
+/*! @brief Get one unicode character as an unsigned integer from utf-8 string
+ *
+ * @param s utf-8 encoded string
+ * @param idx byte position of given string \a s.
+ * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT
+ * @see getNextUtf8OrToLower()
+ * @see getNextUtf8OrToUpper()
+ */
+uint getUtf8Code( const QCString& s, int idx )
+{
+  const int length = s.length();
+  if (idx >= length) { return 0; }
+  const uint c0 = (uchar)s.at(idx);
+  if ( c0 < 0xC2 || c0 >= 0xF8 ) // 1 byte character
+  {
+    return c0;
+  }
+  if (idx+1 >= length) { return 0; }
+  const uint c1 = ((uchar)s.at(idx+1)) & 0x3f;
+  if ( c0 < 0xE0 ) // 2 byte character
+  {
+    return ((c0 & 0x1f) << 6) | c1;
+  }
+  if (idx+2 >= length) { return 0; }
+  const uint c2 = ((uchar)s.at(idx+2)) & 0x3f;
+  if ( c0 < 0xF0 ) // 3 byte character
+  {
+    return ((c0 & 0x0f) << 12) | (c1 << 6) | c2;
+  }
+  if (idx+3 >= length) { return 0; }
+  // 4 byte character
+  const uint c3 = ((uchar)s.at(idx+3)) & 0x3f;
+  return ((c0 & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
+}
+/*! @brief Returns one unicode character as an unsigned integer 
+ *  from utf-8 string, making the character lower case if it was upper case.
+ *
+ * @param s utf-8 encoded string
+ * @param idx byte position of given string \a s.
+ * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
+ * @see getNextUtf8Code()
+*/
+uint getUtf8CodeToLower( const QCString& s, int idx )
+{
+  const uint v = getUtf8Code( s, idx );
+  return v < 0x7f ? tolower( v ) : v;
+}
+/*! @brief Returns one unicode character as ian unsigned interger 
+ *  from utf-8 string, making the character upper case if it was lower case.
+ *
+ * @param s utf-8 encoded string
+ * @param idx byte position of given string \a s.
+ * @return the unicode codepoint, 0 - MAX_UNICODE_CODEPOINT, excludes 'A'-'Z'
+ * @see getNextUtf8Code()
+ */
+uint getUtf8CodeToUpper( const QCString& s, int idx )
+{
+  const uint v = getUtf8Code( s, idx );
+  return v < 0x7f ? toupper( v ) : v;
+}
+//--------------------------------------------------------------------------------------
--- a/src/util.h
+++ b/src/util.h
@@ -411,5 +411,9 @@ bool fileVisibleInIndex(FileDef *fd,bool &genSourceFile);
 void addDocCrossReference(MemberDef *src,MemberDef *dst);
+uint getUtf8Code( const QCString& s, int idx );
+uint getUtf8CodeToLower( const QCString& s, int idx );
+uint getUtf8CodeToUpper( const QCString& s, int idx );
 #endif