Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
K
kicad-source-mirror
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Elphel
kicad-source-mirror
Commits
03a4f5c4
Commit
03a4f5c4
authored
Dec 06, 2013
by
Dick Hollenbeck
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move "code costly" functions in experimental class UTF8 to be not "inlined", prefering compactness.
parent
4374e252
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
84 additions
and
44 deletions
+84
-44
UTF8.cpp
tools/UTF8.cpp
+84
-44
No files found.
tools/UTF8.cpp
View file @
03a4f5c4
...
@@ -2,68 +2,69 @@
...
@@ -2,68 +2,69 @@
#include <stdio.h>
#include <stdio.h>
#include <string>
#include <string>
#include <wx/string.h>
#include <wx/string.h>
#include <
stdin
t.h>
#include <
asser
t.h>
/**
/**
* Class UTF8
* Class UTF8
* is an 8 bit std::string assuredly encoded in UTF8 that supplies special
* is an 8 bit std::string that is assuredly encoded in UTF8, and supplies special
* conversion support to and from wxString, and has iteration over
* conversion support to and from wxString, and has iteration over unicode characters.
* UTF8 code points.
*
* @author Dick Hollenbeck
*/
*/
class
UTF8
:
public
std
::
string
class
UTF8
:
public
std
::
string
{
{
public
:
public
:
UTF8
(
const
wxString
&
o
)
:
UTF8
(
const
wxString
&
o
);
std
::
string
(
(
const
char
*
)
o
.
utf8_str
()
)
{
// @todo: should not be inline.
}
/// This is the only constructor for which you could end up with
/// non-UTF8 encoding, but that would be your fault.
UTF8
(
const
char
*
txt
)
:
UTF8
(
const
char
*
txt
)
:
std
::
string
(
txt
)
std
::
string
(
txt
)
{
{
// ok inline
}
}
explicit
UTF8
(
const
std
::
string
&
o
)
:
explicit
UTF8
(
const
std
::
string
&
o
)
:
std
::
string
(
o
)
std
::
string
(
o
)
{
{
// ok inline
}
}
UTF8
()
:
UTF8
()
:
std
::
string
()
std
::
string
()
{
{
// ok inline
}
}
UTF8
&
operator
=
(
const
wxString
&
o
)
UTF8
&
operator
=
(
const
wxString
&
o
);
{
// @todo: should not be inline.
std
::
string
::
operator
=
(
(
const
char
*
)
o
.
utf8_str
()
);
return
*
this
;
}
UTF8
&
operator
=
(
const
std
::
string
&
o
)
UTF8
&
operator
=
(
const
std
::
string
&
o
)
{
{
std
::
string
::
operator
=
(
o
);
std
::
string
::
operator
=
(
o
);
return
*
this
;
return
*
this
;
}
}
operator
wxString
()
const
operator
wxString
()
const
;
/// This one is not in std::string, and one wonders why... might be a solid
/// enough reason to remove it still.
operator
char
*
()
const
{
{
// @todo: should not be inline.
return
(
char
*
)
c_str
();
return
wxString
(
c_str
(),
wxConvUTF8
);
}
}
static
int
uni_forward
(
unsigned
char
*
it
,
uint32_t
*
result
)
/**
* Function uni_forward
* advances over a UTF8 encoded multibyte character, capturing the unicode
* character as it goes, and returning the number of bytes consumed.
*
* @param aSequence is the UTF8 byte sequence.
* @param aResult is where to put the unicode character.
*/
static
int
uni_forward
(
unsigned
char
*
aSequence
,
unsigned
*
aResult
)
{
{
// @todo: have this read UTF8 characters into result, not bytes.
// @todo: have this read UTF8 characters into result, not bytes.
// What's here now is scaffolding, reading single byte characters only.
// What's here now is scaffolding, reading single byte characters only.
*
result
=
*
it
;
*
aResult
=
*
aSequence
;
return
1
;
return
1
;
}
}
...
@@ -71,37 +72,40 @@ public:
...
@@ -71,37 +72,40 @@ public:
* class uni_iter
* class uni_iter
* is a non-mutable iterator that walks through code points in the UTF8 encoded
* is a non-mutable iterator that walks through code points in the UTF8 encoded
* string. The normal ++(), ++(int), ->(), and *() operators are all supported and
* string. The normal ++(), ++(int), ->(), and *() operators are all supported and
* they return a u
int32_t
holding the unicode character appropriate for respective
* they return a u
nsigned
holding the unicode character appropriate for respective
* operation.
* operation.
*/
*/
class
uni_iter
class
uni_iter
{
{
friend
class
UTF8
;
unsigned
char
*
it
;
unsigned
char
*
it
;
public
:
uni_iter
(
const
char
*
start
)
:
uni_iter
(
const
char
*
start
)
:
it
(
(
unsigned
char
*
)
start
)
it
(
(
unsigned
char
*
)
start
)
{
{
assert
(
sizeof
(
unsigned
)
>=
4
);
}
}
public
:
/// pre-increment and return unicode at new position
/// pre-increment and return unicode at new position
u
int32_t
operator
++
()
u
nsigned
operator
++
()
{
{
u
int32_t
result
;
u
nsigned
result
;
// advance, and toss the result
// advance, and toss the result
it
+=
uni_forward
(
it
,
&
result
);
it
+=
uni_forward
(
it
,
&
result
);
// get the next result, but do not advance:
// get the next result, but do not advance:
uni_forward
(
it
,
&
result
);
uni_forward
(
it
,
&
result
);
return
result
;
return
result
;
}
}
/// post-increment and return unicode at initial position
/// post-increment and return unicode at initial position
u
int32_t
operator
++
(
int
)
u
nsigned
operator
++
(
int
)
{
{
u
int32_t
result
;
u
nsigned
result
;
// grab the result and advance.
// grab the result and advance.
it
+=
uni_forward
(
it
,
&
result
);
it
+=
uni_forward
(
it
,
&
result
);
...
@@ -109,9 +113,9 @@ public:
...
@@ -109,9 +113,9 @@ public:
}
}
/// return unicode at current position
/// return unicode at current position
u
int32_t
operator
->
()
const
u
nsigned
operator
->
()
const
{
{
u
int32_t
result
;
u
nsigned
result
;
// grab the result, do not advance
// grab the result, do not advance
uni_forward
(
it
,
&
result
);
uni_forward
(
it
,
&
result
);
...
@@ -119,9 +123,9 @@ public:
...
@@ -119,9 +123,9 @@ public:
}
}
/// return unicode at current position
/// return unicode at current position
u
int32_t
operator
*
()
const
u
nsigned
operator
*
()
const
{
{
u
int32_t
result
;
u
nsigned
result
;
// grab the result, do not advance
// grab the result, do not advance
uni_forward
(
it
,
&
result
);
uni_forward
(
it
,
&
result
);
...
@@ -136,11 +140,19 @@ public:
...
@@ -136,11 +140,19 @@ public:
bool
operator
>=
(
const
uni_iter
&
other
)
const
{
return
it
>=
other
.
it
;
}
bool
operator
>=
(
const
uni_iter
&
other
)
const
{
return
it
>=
other
.
it
;
}
};
};
/**
* Function ubegin
* returns a @a uni_iter initialized to the start of this UTF8 byte sequence.
*/
uni_iter
ubegin
()
const
uni_iter
ubegin
()
const
{
{
return
uni_iter
(
data
()
);
return
uni_iter
(
data
()
);
}
}
/**
* Function uend
* returns a @a uni_iter initialized to the end of this UTF8 byte sequence.
*/
uni_iter
uend
()
const
uni_iter
uend
()
const
{
{
return
uni_iter
(
data
()
+
size
()
);
return
uni_iter
(
data
()
+
size
()
);
...
@@ -148,9 +160,11 @@ public:
...
@@ -148,9 +160,11 @@ public:
};
};
wxString
a
FunctionTaking_wxString
(
const
wxString
&
wx
)
wxString
wx
FunctionTaking_wxString
(
const
wxString
&
wx
)
{
{
printf
(
"%s: '%s'
\n
"
,
__func__
,
UTF8
(
wx
).
c_str
()
);
printf
(
"%s:'%s'
\n
"
,
__func__
,
(
char
*
)
UTF8
(
wx
)
);
printf
(
"%s:'%s'
\n
"
,
__func__
,
(
const
char
*
)
UTF8
(
wx
)
);
printf
(
"%s:'%s'
\n
"
,
__func__
,
UTF8
(
wx
).
c_str
()
);
return
wx
;
return
wx
;
}
}
...
@@ -158,9 +172,11 @@ wxString aFunctionTaking_wxString( const wxString& wx )
...
@@ -158,9 +172,11 @@ wxString aFunctionTaking_wxString( const wxString& wx )
int
main
()
int
main
()
{
{
UTF8
u1
=
"output"
;
std
::
string
str
=
"input"
;
std
::
string
str
=
"input"
;
wxString
wx
=
wxT
(
"input"
);
UTF8
u1
=
"initial"
;
wxString
wx
=
wxT
(
"input2"
);
printf
(
"u1:'%s'
\n
"
,
u1
.
c_str
()
);
u1
=
str
;
u1
=
str
;
...
@@ -170,25 +186,49 @@ int main()
...
@@ -170,25 +186,49 @@ int main()
u2
+=
'X'
;
u2
+=
'X'
;
printf
(
"u
tf
2:'%s'
\n
"
,
u2
.
c_str
()
);
printf
(
"u2:'%s'
\n
"
,
u2
.
c_str
()
);
// key accomplishments here:
// key accomplishments here:
// 1) passing a UTF8 to a function which normally takes a wxString.
// 1) passing a UTF8 to a function which normally takes a wxString.
// 2) return a wxString back into a UTF8.
// 2) return a wxString back into a UTF8.
UTF8
result
=
a
FunctionTaking_wxString
(
u2
);
UTF8
result
=
wx
FunctionTaking_wxString
(
u2
);
printf
(
"result:'%s'
\n
"
,
result
.
c_str
()
);
printf
(
"result:'%s'
\n
"
,
result
.
c_str
()
);
// test the unicode iterator:
// test the unicode iterator:
for
(
UTF8
::
uni_iter
it
=
u2
.
ubegin
();
it
!=
u2
.
uend
();
)
for
(
UTF8
::
uni_iter
it
=
u2
.
ubegin
();
it
!=
u2
.
uend
();
)
{
{
// test post-increment:
printf
(
" _%c_"
,
it
++
);
printf
(
" _%c_"
,
it
++
);
// after UTF
7::uni_forward() is implemented, it++
%c is no longer useable.
// after UTF
8::uni_forward() is implemented,
%c is no longer useable.
// printf( " _%02x_", it++ );
// printf( " _%02x_", it++ );
}
}
printf
(
"
\n
"
);
printf
(
"
\n
"
);
return
0
;
return
0
;
}
}
// These to go into a library *.cpp, they are not inlined so that code space
// is saved creating the intermediate objects and referencing wxConvUTF8.
UTF8
::
UTF8
(
const
wxString
&
o
)
:
std
::
string
(
(
const
char
*
)
o
.
utf8_str
()
)
{
}
UTF8
::
operator
wxString
()
const
{
return
wxString
(
c_str
(),
wxConvUTF8
);
}
UTF8
&
UTF8
::
operator
=
(
const
wxString
&
o
)
{
std
::
string
::
operator
=
(
(
const
char
*
)
o
.
utf8_str
()
);
return
*
this
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment