00001
00002
00003
00004
00005
00006
00007
00008 #include <assert.h>
00009 #include <stdlib.h>
00010 #include <string.h>
00011
00012 #include <parserutils/charset/mibenum.h>
00013
00014 #include "charset/codecs/codec_impl.h"
00015 #include "utils/endian.h"
00016 #include "utils/utils.h"
00017
00021 typedef struct charset_ascii_codec {
00022 parserutils_charset_codec base;
00024 #define READ_BUFSIZE (8)
00025 uint32_t read_buf[READ_BUFSIZE];
00028 size_t read_len;
00030 #define WRITE_BUFSIZE (8)
00031 uint32_t write_buf[WRITE_BUFSIZE];
00034 size_t write_len;
00036 } charset_ascii_codec;
00037
00038 static bool charset_ascii_codec_handles_charset(const char *charset);
00039 static parserutils_error charset_ascii_codec_create(
00040 const char *charset, parserutils_charset_codec **codec);
00041 static parserutils_error charset_ascii_codec_destroy(
00042 parserutils_charset_codec *codec);
00043 static parserutils_error charset_ascii_codec_encode(
00044 parserutils_charset_codec *codec,
00045 const uint8_t **source, size_t *sourcelen,
00046 uint8_t **dest, size_t *destlen);
00047 static parserutils_error charset_ascii_codec_decode(
00048 parserutils_charset_codec *codec,
00049 const uint8_t **source, size_t *sourcelen,
00050 uint8_t **dest, size_t *destlen);
00051 static parserutils_error charset_ascii_codec_reset(
00052 parserutils_charset_codec *codec);
00053 static inline parserutils_error charset_ascii_codec_read_char(
00054 charset_ascii_codec *c,
00055 const uint8_t **source, size_t *sourcelen,
00056 uint8_t **dest, size_t *destlen);
00057 static inline parserutils_error charset_ascii_codec_output_decoded_char(
00058 charset_ascii_codec *c,
00059 uint32_t ucs4, uint8_t **dest, size_t *destlen);
00060 static inline parserutils_error charset_ascii_from_ucs4(charset_ascii_codec *c,
00061 uint32_t ucs4, uint8_t **s, size_t *len);
00062 static inline parserutils_error charset_ascii_to_ucs4(charset_ascii_codec *c,
00063 const uint8_t *s, size_t len, uint32_t *ucs4);
00064
00071 bool charset_ascii_codec_handles_charset(const char *charset)
00072 {
00073 static uint16_t ascii;
00074 uint16_t match = parserutils_charset_mibenum_from_name(charset,
00075 strlen(charset));
00076
00077 if (ascii == 0) {
00078 ascii = parserutils_charset_mibenum_from_name(
00079 "US-ASCII", SLEN("US-ASCII"));
00080 }
00081
00082 if (ascii != 0 && ascii == match)
00083 return true;
00084
00085 return false;
00086 }
00087
00097 parserutils_error charset_ascii_codec_create(const char *charset,
00098 parserutils_charset_codec **codec)
00099 {
00100 charset_ascii_codec *c;
00101
00102 UNUSED(charset);
00103
00104 c = malloc(sizeof(charset_ascii_codec));
00105 if (c == NULL)
00106 return PARSERUTILS_NOMEM;
00107
00108 c->read_buf[0] = 0;
00109 c->read_len = 0;
00110
00111 c->write_buf[0] = 0;
00112 c->write_len = 0;
00113
00114
00115 c->base.handler.destroy = charset_ascii_codec_destroy;
00116 c->base.handler.encode = charset_ascii_codec_encode;
00117 c->base.handler.decode = charset_ascii_codec_decode;
00118 c->base.handler.reset = charset_ascii_codec_reset;
00119
00120 *codec = (parserutils_charset_codec *) c;
00121
00122 return PARSERUTILS_OK;
00123 }
00124
00131 parserutils_error charset_ascii_codec_destroy (parserutils_charset_codec *codec)
00132 {
00133 UNUSED(codec);
00134
00135 return PARSERUTILS_OK;
00136 }
00137
00165 parserutils_error charset_ascii_codec_encode(parserutils_charset_codec *codec,
00166 const uint8_t **source, size_t *sourcelen,
00167 uint8_t **dest, size_t *destlen)
00168 {
00169 charset_ascii_codec *c = (charset_ascii_codec *) codec;
00170 uint32_t ucs4;
00171 uint32_t *towrite;
00172 size_t towritelen;
00173 parserutils_error error;
00174
00175
00176 if (c->write_len > 0) {
00177 uint32_t *pwrite = c->write_buf;
00178
00179 while (c->write_len > 0) {
00180 error = charset_ascii_from_ucs4(c, pwrite[0],
00181 dest, destlen);
00182 if (error != PARSERUTILS_OK) {
00183 uint32_t len;
00184 assert(error == PARSERUTILS_NOMEM);
00185
00186 for (len = 0; len < c->write_len; len++) {
00187 c->write_buf[len] = pwrite[len];
00188 }
00189
00190 return error;
00191 }
00192
00193 pwrite++;
00194 c->write_len--;
00195 }
00196 }
00197
00198
00199 while (*sourcelen > 0) {
00200 ucs4 = endian_big_to_host(*((uint32_t *) (void *) *source));
00201 towrite = &ucs4;
00202 towritelen = 1;
00203
00204
00205 while (towritelen > 0) {
00206 error = charset_ascii_from_ucs4(c, towrite[0], dest,
00207 destlen);
00208 if (error != PARSERUTILS_OK) {
00209 uint32_t len;
00210 if (error != PARSERUTILS_NOMEM) {
00211 return error;
00212 }
00213
00214
00215 assert(towritelen < WRITE_BUFSIZE);
00216
00217 c->write_len = towritelen;
00218
00219
00220
00221 for (len = 0; len < towritelen; len++)
00222 c->write_buf[len] = towrite[len];
00223
00224
00225
00226 *source += 4;
00227 *sourcelen -= 4;
00228
00229 return PARSERUTILS_NOMEM;
00230 }
00231
00232 towrite++;
00233 towritelen--;
00234 }
00235
00236 *source += 4;
00237 *sourcelen -= 4;
00238 }
00239
00240 return PARSERUTILS_OK;
00241 }
00242
00284 parserutils_error charset_ascii_codec_decode(parserutils_charset_codec *codec,
00285 const uint8_t **source, size_t *sourcelen,
00286 uint8_t **dest, size_t *destlen)
00287 {
00288 charset_ascii_codec *c = (charset_ascii_codec *) codec;
00289 parserutils_error error;
00290
00291 if (c->read_len > 0) {
00292
00293 uint32_t *pread = c->read_buf;
00294
00295 while (c->read_len > 0 && *destlen >= c->read_len * 4) {
00296 *((uint32_t *) (void *) *dest) =
00297 endian_host_to_big(pread[0]);
00298
00299 *dest += 4;
00300 *destlen -= 4;
00301
00302 pread++;
00303 c->read_len--;
00304 }
00305
00306 if (*destlen < c->read_len * 4) {
00307
00308 size_t i;
00309
00310
00311 for (i = 0; i < c->read_len; i++)
00312 c->read_buf[i] = pread[i];
00313
00314 return PARSERUTILS_NOMEM;
00315 }
00316 }
00317
00318
00319 while (*sourcelen > 0) {
00320 error = charset_ascii_codec_read_char(c,
00321 source, sourcelen, dest, destlen);
00322 if (error != PARSERUTILS_OK) {
00323 return error;
00324 }
00325 }
00326
00327 return PARSERUTILS_OK;
00328 }
00329
00336 parserutils_error charset_ascii_codec_reset(parserutils_charset_codec *codec)
00337 {
00338 charset_ascii_codec *c = (charset_ascii_codec *) codec;
00339
00340 c->read_buf[0] = 0;
00341 c->read_len = 0;
00342
00343 c->write_buf[0] = 0;
00344 c->write_len = 0;
00345
00346 return PARSERUTILS_OK;
00347 }
00348
00349
00378 parserutils_error charset_ascii_codec_read_char(charset_ascii_codec *c,
00379 const uint8_t **source, size_t *sourcelen,
00380 uint8_t **dest, size_t *destlen)
00381 {
00382 uint32_t ucs4;
00383 parserutils_error error;
00384
00385
00386 error = charset_ascii_to_ucs4(c, *source, *sourcelen, &ucs4);
00387 if (error == PARSERUTILS_OK) {
00388
00389 error = charset_ascii_codec_output_decoded_char(c,
00390 ucs4, dest, destlen);
00391 if (error == PARSERUTILS_OK || error == PARSERUTILS_NOMEM) {
00392
00393 *source += 1;
00394 *sourcelen -= 1;
00395 }
00396
00397 return error;
00398 } else if (error == PARSERUTILS_NEEDDATA) {
00399
00400 return error;
00401 } else if (error == PARSERUTILS_INVALID) {
00402
00403
00404
00405 if (c->base.errormode ==
00406 PARSERUTILS_CHARSET_CODEC_ERROR_STRICT) {
00407 return PARSERUTILS_INVALID;
00408 }
00409
00410
00411 error = charset_ascii_codec_output_decoded_char(c,
00412 0xFFFD, dest, destlen);
00413 if (error == PARSERUTILS_OK || error == PARSERUTILS_NOMEM) {
00414
00415 *source += 1;
00416 *sourcelen -= 1;
00417 }
00418
00419 return error;
00420 }
00421
00422 return PARSERUTILS_OK;
00423 }
00424
00435 parserutils_error charset_ascii_codec_output_decoded_char(
00436 charset_ascii_codec *c,
00437 uint32_t ucs4, uint8_t **dest, size_t *destlen)
00438 {
00439 if (*destlen < 4) {
00440
00441 c->read_len = 1;
00442 c->read_buf[0] = ucs4;
00443
00444 return PARSERUTILS_NOMEM;
00445 }
00446
00447 *((uint32_t *) (void *) *dest) = endian_host_to_big(ucs4);
00448 *dest += 4;
00449 *destlen -= 4;
00450
00451 return PARSERUTILS_OK;
00452 }
00453
00470 parserutils_error charset_ascii_from_ucs4(charset_ascii_codec *c,
00471 uint32_t ucs4, uint8_t **s, size_t *len)
00472 {
00473 uint8_t out = 0;
00474
00475 if (*len < 1)
00476 return PARSERUTILS_NOMEM;
00477
00478 if (ucs4 < 0x80) {
00479
00480 out = ucs4;
00481 } else {
00482 if (c->base.errormode == PARSERUTILS_CHARSET_CODEC_ERROR_STRICT)
00483 return PARSERUTILS_INVALID;
00484 else
00485 out = '?';
00486 }
00487
00488 *(*s) = out;
00489 (*s)++;
00490 (*len)--;
00491
00492 return PARSERUTILS_OK;
00493 }
00494
00506 parserutils_error charset_ascii_to_ucs4(charset_ascii_codec *c,
00507 const uint8_t *s, size_t len, uint32_t *ucs4)
00508 {
00509 uint32_t out;
00510
00511 UNUSED(c);
00512
00513 if (len < 1)
00514 return PARSERUTILS_NEEDDATA;
00515
00516 if (*s < 0x80) {
00517 out = *s;
00518 } else {
00519 return PARSERUTILS_INVALID;
00520 }
00521
00522 *ucs4 = out;
00523
00524 return PARSERUTILS_OK;
00525 }
00526
00527 const parserutils_charset_handler charset_ascii_codec_handler = {
00528 charset_ascii_codec_handles_charset,
00529 charset_ascii_codec_create
00530 };
00531