libdrawtext
diff src/utf8.c @ 0:bfe431dd1d80
initial commit
author | John Tsiombikas <nuclear@mutantstargoat.com> |
---|---|
date | Thu, 15 Sep 2011 10:47:38 +0300 |
parents | |
children | fe0c54e574ae |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/utf8.c Thu Sep 15 10:47:38 2011 +0300 1.3 @@ -0,0 +1,138 @@ 1.4 +#include "drawtext.h" 1.5 + 1.6 +#define U8_IS_FIRST(x) (((((x) >> 7) & 1) == 0) || ((((x) >> 6) & 3) == 3)) 1.7 + 1.8 +static const char first_mask[] = { 1.9 + 0, 1.10 + 0x7f, /* single byte, 7 bits valid */ 1.11 + 0x1f, /* two-bytes, 5 bits valid */ 1.12 + 0xf, /* three-bytes, 4 bits valid */ 1.13 + 0x7 /* four-bytes, 3 bits valid */ 1.14 +}; 1.15 +static const char first_shift[] = { 7, 5, 4, 3 }; /* see above */ 1.16 + 1.17 +#define CONT_PREFIX 0x80 1.18 +#define CONT_MASK 0x3f 1.19 +#define CONT_SHIFT 6 1.20 + 1.21 +/* last charcodes for 1, 2, 3 or 4-byte utf8 chars */ 1.22 +static const int utf8_lastcode[] = { 0x7f, 0x7ff, 0xfff, 0x1fffff }; 1.23 + 1.24 +#define prefix_mask(x) (~first_mask[x]) 1.25 +#define prefix(x) ((prefix_mask(x) << 1) & 0xff) 1.26 + 1.27 + 1.28 +char *dtx_utf8_next_char(char *str) 1.29 +{ 1.30 + return str + dtx_utf8_nbytes(str); 1.31 +} 1.32 + 1.33 +int dtx_utf8_char_code(const char *str) 1.34 +{ 1.35 + int i, nbytes, shift, code = 0; 1.36 + char mask; 1.37 + 1.38 + if(!U8_IS_FIRST(*str)) { 1.39 + return -1; 1.40 + } 1.41 + 1.42 + nbytes = dtx_utf8_nbytes(str); 1.43 + mask = first_mask[nbytes]; 1.44 + shift = 0; 1.45 + 1.46 + for(i=0; i<nbytes; i++) { 1.47 + if(!*str) { 1.48 + break; 1.49 + } 1.50 + 1.51 + code = (code << shift) | (*str++ & mask); 1.52 + mask = 0x3f; 1.53 + shift = i == 0 ? first_shift[nbytes] : 6; 1.54 + } 1.55 + 1.56 + return code; 1.57 +} 1.58 + 1.59 +int dtx_utf8_nbytes(const char *str) 1.60 +{ 1.61 + int i, numset = 0; 1.62 + int c = *str; 1.63 + 1.64 + if(!U8_IS_FIRST(c)) { 1.65 + for(i=0; !U8_IS_FIRST(str[i]); i++); 1.66 + return i; 1.67 + } 1.68 + 1.69 + /* count the leading 1s */ 1.70 + for(i=0; i<4; i++) { 1.71 + if(((c >> (7 - i)) & 1) == 0) { 1.72 + break; 1.73 + } 1.74 + numset++; 1.75 + } 1.76 + 1.77 + if(!numset) { 1.78 + return 1; 1.79 + } 1.80 + return numset; 1.81 +} 1.82 + 1.83 +int dtx_utf8_char_count(const char *str) 1.84 +{ 1.85 + int n = 0; 1.86 + 1.87 + while(*str) { 1.88 + n++; 1.89 + str = dtx_utf8_next_char((char*)str); 1.90 + } 1.91 + return n; 1.92 +} 1.93 + 1.94 +size_t dtx_utf8_from_char_code(int code, char *buf) 1.95 +{ 1.96 + size_t nbytes = 0; 1.97 + int i; 1.98 + 1.99 + for(i=0; i<4; i++) { 1.100 + if(code <= utf8_lastcode[i]) { 1.101 + nbytes = i + 1; 1.102 + break; 1.103 + } 1.104 + } 1.105 + 1.106 + if(!nbytes && buf) { 1.107 + for(i=0; i<nbytes; i++) { 1.108 + int idx = nbytes - i - 1; 1.109 + int mask, shift, prefix; 1.110 + 1.111 + if(idx > 0) { 1.112 + mask = CONT_MASK; 1.113 + shift = CONT_SHIFT; 1.114 + prefix = CONT_PREFIX; 1.115 + } else { 1.116 + mask = first_mask[nbytes]; 1.117 + shift = first_shift[nbytes]; 1.118 + prefix = prefix(nbytes); 1.119 + } 1.120 + 1.121 + buf[idx] = (code & mask) | (prefix & ~mask); 1.122 + code >>= shift; 1.123 + } 1.124 + } 1.125 + return nbytes; 1.126 +} 1.127 + 1.128 +size_t dtx_utf8_from_string(const wchar_t *str, char *buf) 1.129 +{ 1.130 + size_t nbytes = 0; 1.131 + char *ptr = buf; 1.132 + 1.133 + while(*str) { 1.134 + int cbytes = dtx_utf8_from_char_code(*str++, ptr); 1.135 + if(ptr) { 1.136 + ptr += cbytes; 1.137 + } 1.138 + nbytes += cbytes; 1.139 + } 1.140 + return nbytes; 1.141 +}