Commit d5fb6973 authored by Dimitri van Heesch's avatar Dimitri van Heesch

Bug 705220 - Enables using unicode (non ASCII) to name output files

parent 43461e44
...@@ -276,6 +276,16 @@ Go to the <a href="commands.html">next</a> section or return to the ...@@ -276,6 +276,16 @@ Go to the <a href="commands.html">next</a> section or return to the
Enabling this option can be useful when feeding doxygen a huge amount of source Enabling this option can be useful when feeding doxygen a huge amount of source
files, where putting all generated files in the same directory would otherwise files, where putting all generated files in the same directory would otherwise
causes performance problems for the file system. causes performance problems for the file system.
]]>
</docs>
</option>
<option type='bool' id='ALLOW_UNICODE_NAMES' defval='0'>
<docs>
<![CDATA[
If the \c ALLOW_UNICODE_NAMES tag is set to \c YES,
doxygen will allow non-ascii characters to appear in the names of generated files.
If set to \c NO, non-ASCII characters will be escaped, for example _xE3_x81_x84
will be used for Unicode U+3044.
]]> ]]>
</docs> </docs>
</option> </option>
......
...@@ -5144,6 +5144,7 @@ bool hasVisibleRoot(BaseClassList *bcl) ...@@ -5144,6 +5144,7 @@ bool hasVisibleRoot(BaseClassList *bcl)
QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscore) QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscore)
{ {
static bool caseSenseNames = Config_getBool("CASE_SENSE_NAMES"); static bool caseSenseNames = Config_getBool("CASE_SENSE_NAMES");
static bool allowUnicodeNames = Config_getBool("ALLOW_UNICODE_NAMES");
static GrowBuf growBuf; static GrowBuf growBuf;
growBuf.clear(); growBuf.clear();
char c; char c;
...@@ -5179,8 +5180,49 @@ QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscor ...@@ -5179,8 +5180,49 @@ QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscor
default: default:
if (c<0) if (c<0)
{ {
static char map[] = "0123456789ABCDEF";
char ids[5]; char ids[5];
const unsigned char uc = (unsigned char)c;
bool doEscape = TRUE;
if (allowUnicodeNames && uc <= 0xf7)
{
const char* pt = p;
ids[ 0 ] = c;
int l = 0;
if ((uc&0xE0)==0xC0)
{
l=2; // 11xx.xxxx: >=2 byte character
}
if ((uc&0xF0)==0xE0)
{
l=3; // 111x.xxxx: >=3 byte character
}
if ((uc&0xF8)==0xF0)
{
l=4; // 1111.xxxx: >=4 byte character
}
doEscape = l==0;
for (int m=1; m<l && !doEscape; ++m)
{
unsigned char ct = (unsigned char)*pt;
if (ct==0 || (ct&0xC0)!=0x80) // invalid unicode character
{
doEscape=TRUE;
}
else
{
ids[ m ] = *pt++;
}
}
if ( !doEscape ) // got a valid unicode character
{
ids[ l ] = 0;
growBuf.addStr( ids );
p += l - 1;
}
}
if (doEscape) // not a valid unicode char or escaping needed
{
static char map[] = "0123456789ABCDEF";
unsigned char id = (unsigned char)c; unsigned char id = (unsigned char)c;
ids[0]='_'; ids[0]='_';
ids[1]='x'; ids[1]='x';
...@@ -5189,6 +5231,7 @@ QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscor ...@@ -5189,6 +5231,7 @@ QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscor
ids[4]=0; ids[4]=0;
growBuf.addStr(ids); growBuf.addStr(ids);
} }
}
else if (caseSenseNames || !isupper(c)) else if (caseSenseNames || !isupper(c))
{ {
growBuf.addChar(c); growBuf.addChar(c);
...@@ -6922,14 +6965,25 @@ const char *writeUtf8Char(FTextStream &t,const char *s) ...@@ -6922,14 +6965,25 @@ const char *writeUtf8Char(FTextStream &t,const char *s)
t << c; t << c;
if (c<0) // multibyte character if (c<0) // multibyte character
{ {
t << *s++; if (((uchar)c&0xE0)==0xC0)
if (((uchar)c&0xE0)==0xE0) {
t << *s++; // 11xx.xxxx: >=2 byte character
}
if (((uchar)c&0xF0)==0xE0)
{ {
t << *s++; // 111x.xxxx: >=3 byte character t << *s++; // 111x.xxxx: >=3 byte character
} }
if (((uchar)c&0xF0)==0xF0) if (((uchar)c&0xF8)==0xF0)
{
t << *s++; // 1111.xxxx: >=4 byte character
}
if (((uchar)c&0xFC)==0xF8)
{ {
t << *s++; // 1111.xxxx: 4 byte character t << *s++; // 1111.1xxx: >=5 byte character
}
if (((uchar)c&0xFE)==0xFC)
{
t << *s++; // 1111.1xxx: 6 byte character
} }
} }
return s; return s;
...@@ -6942,14 +6996,25 @@ int nextUtf8CharPosition(const QCString &utf8Str,int len,int startPos) ...@@ -6942,14 +6996,25 @@ int nextUtf8CharPosition(const QCString &utf8Str,int len,int startPos)
char c = utf8Str[startPos]; char c = utf8Str[startPos];
if (c<0) // multibyte utf-8 character if (c<0) // multibyte utf-8 character
{ {
bytes++; // 1xxx.xxxx: >=2 byte character if (((uchar)c&0xE0)==0xC0)
if (((uchar)c&0xE0)==0xE0) {
bytes++; // 11xx.xxxx: >=2 byte character
}
if (((uchar)c&0xF0)==0xE0)
{ {
bytes++; // 111x.xxxx: >=3 byte character bytes++; // 111x.xxxx: >=3 byte character
} }
if (((uchar)c&0xF0)==0xF0) if (((uchar)c&0xF8)==0xF0)
{
bytes++; // 1111.xxxx: >=4 byte character
}
if (((uchar)c&0xFC)==0xF8)
{
bytes++; // 1111.1xxx: >=5 byte character
}
if (((uchar)c&0xFE)==0xFC)
{ {
bytes++; // 1111.xxxx: 4 byte character bytes++; // 1111.1xxx: 6 byte character
} }
} }
else if (c=='&') // skip over character entities else if (c=='&') // skip over character entities
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment