00001
00002
00003
00004
00005
00006
00007
00012 #include <stdbool.h>
00013 #include <stdlib.h>
00014 #include <string.h>
00015
00016 #include <parserutils/charset/utf8.h>
00017 #include "charset/encodings/utf8impl.h"
00018
00020 const uint8_t numContinuations[256] = {
00021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00036 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
00037 };
00038
00051 parserutils_error parserutils_charset_utf8_to_ucs4(const uint8_t *s, size_t len,
00052 uint32_t *ucs4, size_t *clen)
00053 {
00054 parserutils_error error;
00055
00056 UTF8_TO_UCS4(s, len, ucs4, clen, error);
00057
00058 return error;
00059 }
00060
00072 parserutils_error parserutils_charset_utf8_from_ucs4(uint32_t ucs4,
00073 uint8_t **s, size_t *len)
00074 {
00075 parserutils_error error;
00076
00077 UTF8_FROM_UCS4(ucs4, s, len, error);
00078
00079 return error;
00080 }
00081
00090 parserutils_error parserutils_charset_utf8_length(const uint8_t *s, size_t max,
00091 size_t *len)
00092 {
00093 parserutils_error error;
00094
00095 UTF8_LENGTH(s, max, len, error);
00096
00097 return error;
00098 }
00099
00107 parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s,
00108 size_t *len)
00109 {
00110 parserutils_error error;
00111
00112 UTF8_CHAR_BYTE_LENGTH(s, len, error);
00113
00114 return error;
00115 }
00116
00126 parserutils_error parserutils_charset_utf8_prev(const uint8_t *s, uint32_t off,
00127 uint32_t *prevoff)
00128 {
00129 parserutils_error error;
00130
00131 UTF8_PREV(s, off, prevoff, error);
00132
00133 return error;
00134 }
00135
00146 parserutils_error parserutils_charset_utf8_next(const uint8_t *s, uint32_t len,
00147 uint32_t off, uint32_t *nextoff)
00148 {
00149 parserutils_error error;
00150
00151 UTF8_NEXT(s, len, off, nextoff, error);
00152
00153 return error;
00154 }
00155
00166 parserutils_error parserutils_charset_utf8_next_paranoid(const uint8_t *s,
00167 uint32_t len, uint32_t off, uint32_t *nextoff)
00168 {
00169 parserutils_error error;
00170
00171 UTF8_NEXT_PARANOID(s, len, off, nextoff, error);
00172
00173 return error;
00174 }
00175