vrshoot
diff libs/drawtext/utf8.c @ 0:b2f14e535253
initial commit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sat, 01 Feb 2014 19:58:19 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libs/drawtext/utf8.c Sat Feb 01 19:58:19 2014 +0200 1.3 @@ -0,0 +1,154 @@ 1.4 +/* 1.5 +libdrawtext - a simple library for fast text rendering in OpenGL 1.6 +Copyright (C) 2011 John Tsiombikas <nuclear@member.fsf.org> 1.7 + 1.8 +This program is free software: you can redistribute it and/or modify 1.9 +it under the terms of the GNU Lesser General Public License as published by 1.10 +the Free Software Foundation, either version 3 of the License, or 1.11 +(at your option) any later version. 1.12 + 1.13 +This program is distributed in the hope that it will be useful, 1.14 +but WITHOUT ANY WARRANTY; without even the implied warranty of 1.15 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1.16 +GNU Lesser General Public License for more details. 1.17 + 1.18 +You should have received a copy of the GNU Lesser General Public License 1.19 +along with this program. If not, see <http://www.gnu.org/licenses/>. 1.20 +*/ 1.21 +#include "drawtext.h" 1.22 + 1.23 +#define U8_IS_FIRST(x) (((((x) >> 7) & 1) == 0) || ((((x) >> 6) & 3) == 3)) 1.24 + 1.25 +static const char first_mask[] = { 1.26 + 0, 1.27 + 0x7f, /* single byte, 7 bits valid */ 1.28 + 0x1f, /* two-bytes, 5 bits valid */ 1.29 + 0xf, /* three-bytes, 4 bits valid */ 1.30 + 0x7 /* four-bytes, 3 bits valid */ 1.31 +}; 1.32 +static const char first_shift[] = { 0, 7, 5, 4, 3 }; /* see above */ 1.33 + 1.34 +#define CONT_PREFIX 0x80 1.35 +#define CONT_MASK 0x3f 1.36 +#define CONT_SHIFT 6 1.37 + 1.38 +/* last charcodes for 1, 2, 3 or 4-byte utf8 chars */ 1.39 +static const int utf8_lastcode[] = { 0x7f, 0x7ff, 0xfff, 0x1fffff }; 1.40 + 1.41 +#define prefix_mask(x) (~first_mask[x]) 1.42 +#define prefix(x) ((prefix_mask(x) << 1) & 0xff) 1.43 + 1.44 + 1.45 +char *dtx_utf8_next_char(char *str) 1.46 +{ 1.47 + return str + dtx_utf8_nbytes(str); 1.48 +} 1.49 + 1.50 +int dtx_utf8_char_code(const char *str) 1.51 +{ 1.52 + int i, nbytes, shift, code = 0; 1.53 + int mask; 1.54 + 1.55 + if(!U8_IS_FIRST(*str)) { 1.56 + return -1; 1.57 + } 1.58 + 1.59 + nbytes = dtx_utf8_nbytes(str); 1.60 + mask = first_mask[nbytes]; 1.61 + shift = 0; 1.62 + 1.63 + for(i=0; i<nbytes; i++) { 1.64 + if(!*str) { 1.65 + break; 1.66 + } 1.67 + 1.68 + code = (code << shift) | (*str++ & mask); 1.69 + mask = 0x3f; 1.70 + shift = 6; 1.71 + } 1.72 + return code; 1.73 +} 1.74 + 1.75 +int dtx_utf8_nbytes(const char *str) 1.76 +{ 1.77 + int i, numset = 0; 1.78 + int c = *str; 1.79 + 1.80 + if(!U8_IS_FIRST(c)) { 1.81 + for(i=0; !U8_IS_FIRST(str[i]); i++); 1.82 + return i; 1.83 + } 1.84 + 1.85 + /* count the leading 1s */ 1.86 + for(i=0; i<4; i++) { 1.87 + if(((c >> (7 - i)) & 1) == 0) { 1.88 + break; 1.89 + } 1.90 + numset++; 1.91 + } 1.92 + 1.93 + if(!numset) { 1.94 + return 1; 1.95 + } 1.96 + return numset; 1.97 +} 1.98 + 1.99 +int dtx_utf8_char_count(const char *str) 1.100 +{ 1.101 + int n = 0; 1.102 + 1.103 + while(*str) { 1.104 + n++; 1.105 + str = dtx_utf8_next_char((char*)str); 1.106 + } 1.107 + return n; 1.108 +} 1.109 + 1.110 +size_t dtx_utf8_from_char_code(int code, char *buf) 1.111 +{ 1.112 + size_t nbytes = 0; 1.113 + int i; 1.114 + 1.115 + for(i=0; i<4; i++) { 1.116 + if(code <= utf8_lastcode[i]) { 1.117 + nbytes = i + 1; 1.118 + break; 1.119 + } 1.120 + } 1.121 + 1.122 + if(!nbytes && buf) { 1.123 + for(i=0; i<(int)nbytes; i++) { 1.124 + int idx = nbytes - i - 1; 1.125 + int mask, shift, prefix; 1.126 + 1.127 + if(idx > 0) { 1.128 + mask = CONT_MASK; 1.129 + shift = CONT_SHIFT; 1.130 + prefix = CONT_PREFIX; 1.131 + } else { 1.132 + mask = first_mask[nbytes]; 1.133 + shift = first_shift[nbytes]; 1.134 + prefix = prefix(nbytes); 1.135 + } 1.136 + 1.137 + buf[idx] = (code & mask) | (prefix & ~mask); 1.138 + code >>= shift; 1.139 + } 1.140 + } 1.141 + return nbytes; 1.142 +} 1.143 + 1.144 +size_t dtx_utf8_from_string(const wchar_t *str, char *buf) 1.145 +{ 1.146 + size_t nbytes = 0; 1.147 + char *ptr = buf; 1.148 + 1.149 + while(*str) { 1.150 + int cbytes = dtx_utf8_from_char_code(*str++, ptr); 1.151 + if(ptr) { 1.152 + ptr += cbytes; 1.153 + } 1.154 + nbytes += cbytes; 1.155 + } 1.156 + return nbytes; 1.157 +}