libdrawtext
view src/utf8.c @ 55:59e5858de836
fixed a bug in utf-8 decoding
author | John Tsiombikas <nuclear@mutantstargoat.com> |
---|---|
date | Thu, 15 Sep 2011 23:32:39 +0300 |
parents | bfe431dd1d80 |
children | 17fed026b24b |
line source
1 #include "drawtext.h"
3 #define U8_IS_FIRST(x) (((((x) >> 7) & 1) == 0) || ((((x) >> 6) & 3) == 3))
5 static const char first_mask[] = {
6 0,
7 0x7f, /* single byte, 7 bits valid */
8 0x1f, /* two-bytes, 5 bits valid */
9 0xf, /* three-bytes, 4 bits valid */
10 0x7 /* four-bytes, 3 bits valid */
11 };
12 static const char first_shift[] = { 0, 7, 5, 4, 3 }; /* see above */
14 #define CONT_PREFIX 0x80
15 #define CONT_MASK 0x3f
16 #define CONT_SHIFT 6
18 /* last charcodes for 1, 2, 3 or 4-byte utf8 chars */
19 static const int utf8_lastcode[] = { 0x7f, 0x7ff, 0xfff, 0x1fffff };
21 #define prefix_mask(x) (~first_mask[x])
22 #define prefix(x) ((prefix_mask(x) << 1) & 0xff)
25 char *dtx_utf8_next_char(char *str)
26 {
27 return str + dtx_utf8_nbytes(str);
28 }
30 int dtx_utf8_char_code(const char *str)
31 {
32 int i, nbytes, shift, code = 0;
33 int mask;
35 if(!U8_IS_FIRST(*str)) {
36 return -1;
37 }
39 nbytes = dtx_utf8_nbytes(str);
40 mask = first_mask[nbytes];
41 shift = 0;
43 for(i=0; i<nbytes; i++) {
44 if(!*str) {
45 break;
46 }
48 code = (code << shift) | (*str++ & mask);
49 mask = 0x3f;
50 shift = 6;
51 }
53 printf("code: %x\n", code);
54 return code;
55 }
57 int dtx_utf8_nbytes(const char *str)
58 {
59 int i, numset = 0;
60 int c = *str;
62 if(!U8_IS_FIRST(c)) {
63 for(i=0; !U8_IS_FIRST(str[i]); i++);
64 return i;
65 }
67 /* count the leading 1s */
68 for(i=0; i<4; i++) {
69 if(((c >> (7 - i)) & 1) == 0) {
70 break;
71 }
72 numset++;
73 }
75 if(!numset) {
76 return 1;
77 }
78 return numset;
79 }
81 int dtx_utf8_char_count(const char *str)
82 {
83 int n = 0;
85 while(*str) {
86 n++;
87 str = dtx_utf8_next_char((char*)str);
88 }
89 return n;
90 }
92 size_t dtx_utf8_from_char_code(int code, char *buf)
93 {
94 size_t nbytes = 0;
95 int i;
97 for(i=0; i<4; i++) {
98 if(code <= utf8_lastcode[i]) {
99 nbytes = i + 1;
100 break;
101 }
102 }
104 if(!nbytes && buf) {
105 for(i=0; i<nbytes; i++) {
106 int idx = nbytes - i - 1;
107 int mask, shift, prefix;
109 if(idx > 0) {
110 mask = CONT_MASK;
111 shift = CONT_SHIFT;
112 prefix = CONT_PREFIX;
113 } else {
114 mask = first_mask[nbytes];
115 shift = first_shift[nbytes];
116 prefix = prefix(nbytes);
117 }
119 buf[idx] = (code & mask) | (prefix & ~mask);
120 code >>= shift;
121 }
122 }
123 return nbytes;
124 }
126 size_t dtx_utf8_from_string(const wchar_t *str, char *buf)
127 {
128 size_t nbytes = 0;
129 char *ptr = buf;
131 while(*str) {
132 int cbytes = dtx_utf8_from_char_code(*str++, ptr);
133 if(ptr) {
134 ptr += cbytes;
135 }
136 nbytes += cbytes;
137 }
138 return nbytes;
139 }