Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
K
kicad-source-mirror
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Elphel
kicad-source-mirror
Commits
5df72886
Commit
5df72886
authored
Dec 08, 2013
by
Dick Hollenbeck
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
complete class UTF8.cpp
parent
2f327f06
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
176 additions
and
22 deletions
+176
-22
stroke_font.cpp
common/gal/stroke_font.cpp
+2
-1
UTF8.cpp
tools/UTF8.cpp
+170
-19
make-UTF8.sh
tools/make-UTF8.sh
+4
-2
No files found.
common/gal/stroke_font.cpp
View file @
5df72886
...
...
@@ -249,7 +249,8 @@ void STROKE_FONT::drawSingleLineText( const wxString& aText )
// (textSize.x)
xOffset
=
textSize
.
x
;
glyphSize
.
x
=
-
m_glyphSize
.
x
;
}
else
}
else
{
xOffset
=
0.0
;
}
...
...
tools/UTF8.cpp
View file @
5df72886
...
...
@@ -10,6 +10,15 @@
* is an 8 bit std::string that is assuredly encoded in UTF8, and supplies special
* conversion support to and from wxString, and has iteration over unicode characters.
*
* <p>I've been careful to supply only conversion facillities and not try
* and duplicate wxString() with many member functions. In the end it is
* to be a std::string. There are multiple ways to create text into a std::string
* without the need of member functions. std::ostringstream.
*
* <p>Because this class used no virtuals, it should be possible to cast any
* std::string into a UTF8 using this kind of cast: (UTF8 &) without construction
* or copying being the effect of the cast.
*
* @author Dick Hollenbeck
*/
class
UTF8
:
public
std
::
string
...
...
@@ -25,6 +34,9 @@ public:
{
}
/// For use with _() function on wx 2.8:
UTF8
(
const
wchar_t
*
txt
);
explicit
UTF8
(
const
std
::
string
&
o
)
:
std
::
string
(
o
)
{
...
...
@@ -54,25 +66,20 @@ public:
/**
* Function uni_forward
* advances over a
UTF8 encoded multibyte character, capturing the unicod
e
* character as it goes, and returning the number of bytes consumed.
* advances over a
single UTF8 encoded multibyte character, capturing th
e
*
unicode
character as it goes, and returning the number of bytes consumed.
*
* @param aSequence is the UTF8 byte sequence.
* @param aResult is where to put the unicode character.
* @param aSequence is the UTF8 byte sequence, must be aligned on start of character.
* @param aResult is where to put the unicode character, and may be NULL if no interest.
* @return int - the count of bytes consumed.
*/
static
int
uni_forward
(
unsigned
char
*
aSequence
,
unsigned
*
aResult
)
{
// @todo: have this read UTF8 characters into result, not bytes.
// What's here now is scaffolding, reading single byte characters only.
*
aResult
=
*
aSequence
;
return
1
;
}
static
int
uni_forward
(
unsigned
char
*
aSequence
,
unsigned
*
aResult
=
NULL
);
/**
* class uni_iter
* is a non-mutable iterator that walks through code points in the UTF8 encoded
* string. The normal ++(), ++(int), ->(), and *() operators are all supported and
* they return a unsigned holding the unicode character appropriate for respective
* they return a
n
unsigned holding the unicode character appropriate for respective
* operation.
*/
class
uni_iter
...
...
@@ -81,10 +88,11 @@ public:
unsigned
char
*
it
;
// private constructor.
uni_iter
(
const
char
*
start
)
:
it
(
(
unsigned
char
*
)
start
)
{
assert
(
sizeof
(
unsigned
)
>=
4
);
// for the human:
assert( sizeof(unsigned) >= 4 );
}
public
:
...
...
@@ -94,10 +102,10 @@ public:
{
unsigned
result
;
// advance
, and toss th
e result
it
+=
uni_forward
(
it
,
&
result
);
// advance
over current, and toss the unicod
e result
it
+=
uni_forward
(
it
);
// get the next result, but do not advance:
// get the next
unicode
result, but do not advance:
uni_forward
(
it
,
&
result
);
return
result
;
}
...
...
@@ -173,15 +181,21 @@ wxString wxFunctionTaking_wxString( const wxString& wx )
int
main
()
{
std
::
string
str
=
"input"
;
UTF8
u0
=
L"wide string"
;
UTF8
u1
=
"initial"
;
wxString
wx
=
wxT
(
"input2"
);
printf
(
"u0:'%s'
\n
"
,
u0
.
c_str
()
);
printf
(
"u1:'%s'
\n
"
,
u1
.
c_str
()
);
u1
=
str
;
wxString
wx2
=
u1
;
// force a std::string into a UTF8, then into a wxString, then copy construct:
wxString
wx3
=
(
UTF8
&
)
u1
;
UTF8
u2
=
wx2
;
u2
+=
'X'
;
...
...
@@ -196,7 +210,7 @@ int main()
printf
(
"result:'%s'
\n
"
,
result
.
c_str
()
);
// test the unicode iterator:
for
(
UTF8
::
uni_iter
it
=
u2
.
ubegin
();
it
!=
u2
.
uend
();
)
for
(
UTF8
::
uni_iter
it
=
u2
.
ubegin
();
it
<
u2
.
uend
();
)
{
// test post-increment:
printf
(
" _%c_"
,
it
++
);
...
...
@@ -211,8 +225,13 @@ int main()
}
// These to go into a library *.cpp, they are not inlined so that code space
// is saved creating the intermediate objects and referencing wxConvUTF8.
/*
These to go into a library *.cpp, they are not inlined so that significant
code space is saved by encapsulating the creation of intermediate objects
and referencing wxConvUTF8.
*/
UTF8
::
UTF8
(
const
wxString
&
o
)
:
...
...
@@ -232,3 +251,135 @@ UTF8& UTF8::operator=( const wxString& o )
std
::
string
::
operator
=
(
(
const
char
*
)
o
.
utf8_str
()
);
return
*
this
;
}
static
const
unsigned
char
utf8_len
[
256
]
=
{
// Map encoded prefix byte to sequence length. Zero means
// illegal prefix. See RFC 3629 for details
/*
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
*/
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// 80-8F
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
// B0-BF
0
,
0
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
// C0-C1 + C2-CF
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
// D0-DF
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
// E0-EF
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
// F0-F4 + F5-FF
};
#ifndef THROW_IO_ERROR
#define THROW_IO_ERROR(x) // nothing
#endif
// There is no wxWidgets function that does this, because wchar_t is 16 bits
// on windows and wx wants to encode the output in UTF16 for such.
int
UTF8
::
uni_forward
(
unsigned
char
*
aSequence
,
unsigned
*
aResult
)
{
unsigned
ch
=
*
aSequence
;
if
(
ch
<
0x80
)
{
if
(
aResult
)
*
aResult
=
ch
;
return
1
;
}
unsigned
char
*
s
=
aSequence
;
int
len
=
utf8_len
[
*
s
-
0x80
/* top half of table is missing */
];
switch
(
len
)
{
default
:
case
0
:
THROW_IO_ERROR
(
"invalid start byte"
);
break
;
case
2
:
if
(
(
s
[
1
]
&
0xc0
)
!=
0x80
)
{
THROW_IO_ERROR
(
"invalid continuation byte"
);
}
ch
=
((
s
[
0
]
&
0x1f
)
<<
6
)
+
((
s
[
1
]
&
0x3f
)
<<
0
);
assert
(
ch
>
0x007F
&&
ch
<=
0x07FF
);
break
;
case
3
:
if
(
(
s
[
1
]
&
0xc0
)
!=
0x80
||
(
s
[
2
]
&
0xc0
)
!=
0x80
||
(
s
[
0
]
==
0xE0
&&
s
[
1
]
<
0xA0
)
// || (s[0] == 0xED && s[1] > 0x9F)
)
{
THROW_IO_ERROR
(
"invalid continuation byte"
);
}
ch
=
((
s
[
0
]
&
0x0f
)
<<
12
)
+
((
s
[
1
]
&
0x3f
)
<<
6
)
+
((
s
[
2
]
&
0x3f
)
<<
0
);
assert
(
ch
>
0x07FF
&&
ch
<=
0xFFFF
);
break
;
case
4
:
if
(
(
s
[
1
]
&
0xc0
)
!=
0x80
||
(
s
[
2
]
&
0xc0
)
!=
0x80
||
(
s
[
3
]
&
0xc0
)
!=
0x80
||
(
s
[
0
]
==
0xF0
&&
s
[
1
]
<
0x90
)
||
(
s
[
0
]
==
0xF4
&&
s
[
1
]
>
0x8F
)
)
{
THROW_IO_ERROR
(
"invalid continuation byte"
);
}
ch
=
((
s
[
0
]
&
0x7
)
<<
18
)
+
((
s
[
1
]
&
0x3f
)
<<
12
)
+
((
s
[
2
]
&
0x3f
)
<<
6
)
+
((
s
[
3
]
&
0x3f
)
<<
0
);
assert
(
ch
>
0xFFFF
&&
ch
<=
0x10ffff
);
break
;
}
if
(
aResult
)
{
*
aResult
=
ch
;
}
return
len
;
}
UTF8
::
UTF8
(
const
wchar_t
*
txt
)
:
// size initial string safely large enough, then shrink to known size later.
std
::
string
(
wcslen
(
txt
)
*
4
,
0
)
{
/*
"this" string was sized to hold the worst case UTF8 encoded byte
sequence, and was initialized with all nul bytes. Overwrite some of
those nuls, then resize, shrinking down to actual size.
Use the wx 2.8 function, not new FromWChar(). It knows about wchar_t
possibly being 16 bits wide on Windows and holding UTF16 input.
*/
int
sz
=
wxConvUTF8
.
WC2MB
(
(
char
*
)
data
(),
txt
,
size
()
);
resize
(
sz
);
}
tools/make-UTF8.sh
View file @
5df72886
WXCONFIG
=
wx-config
INCLUDE
=
/usr/include/wx-2.8
#WXCONFIG=/opt/wx2.9/bin/wx-config
g++
-
I
$INCLUDE
$(
$WXCONFIG
--cppflags
)
UTF8.cpp
-o
test
$(
$WXCONFIG
--libs
)
g++
-
g
$(
$WXCONFIG
--cppflags
)
UTF8.cpp
-o
test
$(
$WXCONFIG
--libs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment