filter.c
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008 #include <errno.h>
00009 #include <stdbool.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012
00013 #ifndef WITHOUT_ICONV_FILTER
00014 #include <iconv.h>
00015 #endif
00016
00017 #include <parserutils/charset/mibenum.h>
00018 #include <parserutils/charset/codec.h>
00019
00020 #include "input/filter.h"
00021 #include "utils/utils.h"
00022
00024 struct parserutils_filter {
00025 #ifndef WITHOUT_ICONV_FILTER
00026 iconv_t cd;
00027 uint16_t int_enc;
00028 #else
00029 parserutils_charset_codec *read_codec;
00030 parserutils_charset_codec *write_codec;
00032 uint32_t pivot_buf[64];
00034 bool leftover;
00035 uint8_t *pivot_left;
00036 size_t pivot_len;
00037 #endif
00038
00039 struct {
00040 uint16_t encoding;
00041 } settings;
00042 };
00043
00044 static parserutils_error filter_set_defaults(parserutils_filter *input);
00045 static parserutils_error filter_set_encoding(parserutils_filter *input,
00046 const char *enc);
00047
00058 parserutils_error parserutils__filter_create(const char *int_enc,
00059 parserutils_filter **filter)
00060 {
00061 parserutils_filter *f;
00062 parserutils_error error;
00063
00064 if (int_enc == NULL || filter == NULL)
00065 return PARSERUTILS_BADPARM;
00066
00067 f = malloc(sizeof(parserutils_filter));
00068 if (f == NULL)
00069 return PARSERUTILS_NOMEM;
00070
00071 #ifndef WITHOUT_ICONV_FILTER
00072 f->cd = (iconv_t) -1;
00073 f->int_enc = parserutils_charset_mibenum_from_name(
00074 int_enc, strlen(int_enc));
00075 if (f->int_enc == 0) {
00076 free(f);
00077 return PARSERUTILS_BADENCODING;
00078 }
00079 #else
00080 f->leftover = false;
00081 f->pivot_left = NULL;
00082 f->pivot_len = 0;
00083 #endif
00084
00085 error = filter_set_defaults(f);
00086 if (error != PARSERUTILS_OK) {
00087 free(f);
00088 return error;
00089 }
00090
00091 #ifdef WITHOUT_ICONV_FILTER
00092 error = parserutils_charset_codec_create(int_enc, &f->write_codec);
00093 if (error != PARSERUTILS_OK) {
00094 if (f->read_codec != NULL) {
00095 parserutils_charset_codec_destroy(f->read_codec);
00096 f->read_codec = NULL;
00097 }
00098 free(f);
00099 return error;
00100 }
00101 #endif
00102
00103 *filter = f;
00104
00105 return PARSERUTILS_OK;
00106 }
00107
00114 parserutils_error parserutils__filter_destroy(parserutils_filter *input)
00115 {
00116 if (input == NULL)
00117 return PARSERUTILS_BADPARM;
00118
00119 #ifndef WITHOUT_ICONV_FILTER
00120 if (input->cd != (iconv_t) -1) {
00121 iconv_close(input->cd);
00122 input->cd = (iconv_t) -1;
00123 }
00124 #else
00125 if (input->read_codec != NULL) {
00126 parserutils_charset_codec_destroy(input->read_codec);
00127 input->read_codec = NULL;
00128 }
00129
00130 if (input->write_codec != NULL) {
00131 parserutils_charset_codec_destroy(input->write_codec);
00132 input->write_codec = NULL;
00133 }
00134 #endif
00135
00136 free(input);
00137
00138 return PARSERUTILS_OK;
00139 }
00140
00149 parserutils_error parserutils__filter_setopt(parserutils_filter *input,
00150 parserutils_filter_opttype type,
00151 parserutils_filter_optparams *params)
00152 {
00153 parserutils_error error = PARSERUTILS_OK;
00154
00155 if (input == NULL || params == NULL)
00156 return PARSERUTILS_BADPARM;
00157
00158 switch (type) {
00159 case PARSERUTILS_FILTER_SET_ENCODING:
00160 error = filter_set_encoding(input, params->encoding.name);
00161 break;
00162 }
00163
00164 return error;
00165 }
00166
00179 parserutils_error parserutils__filter_process_chunk(parserutils_filter *input,
00180 const uint8_t **data, size_t *len,
00181 uint8_t **output, size_t *outlen)
00182 {
00183 if (input == NULL || data == NULL || *data == NULL || len == NULL ||
00184 output == NULL || *output == NULL || outlen == NULL)
00185 return PARSERUTILS_BADPARM;
00186
00187 #ifndef WITHOUT_ICONV_FILTER
00188 if (iconv(input->cd, (void *) data, len,
00189 (char **) output, outlen) == (size_t) -1) {
00190 switch (errno) {
00191 case E2BIG:
00192 return PARSERUTILS_NOMEM;
00193 case EILSEQ:
00194 if (*outlen < 3)
00195 return PARSERUTILS_NOMEM;
00196
00197 (*output)[0] = 0xef;
00198 (*output)[1] = 0xbf;
00199 (*output)[2] = 0xbd;
00200
00201 *output += 3;
00202 *outlen -= 3;
00203
00204 (*data)++;
00205 (*len)--;
00206
00207 while (*len > 0) {
00208 size_t ret;
00209
00210 ret = iconv(input->cd, (void *) data, len,
00211 (char **) output, outlen);
00212 if (ret != (size_t) -1 || errno != EILSEQ)
00213 break;
00214
00215 if (*outlen < 3)
00216 return PARSERUTILS_NOMEM;
00217
00218 (*output)[0] = 0xef;
00219 (*output)[1] = 0xbf;
00220 (*output)[2] = 0xbd;
00221
00222 *output += 3;
00223 *outlen -= 3;
00224
00225 (*data)++;
00226 (*len)--;
00227 }
00228
00229 return errno == E2BIG ? PARSERUTILS_NOMEM
00230 : PARSERUTILS_OK;
00231 }
00232 }
00233
00234 return PARSERUTILS_OK;
00235 #else
00236 if (input->leftover) {
00237 parserutils_error write_error;
00238
00239
00240
00241
00242 write_error = parserutils_charset_codec_encode(
00243 input->write_codec,
00244 (const uint8_t **) &input->pivot_left,
00245 &input->pivot_len,
00246 output, outlen);
00247
00248 if (write_error != PARSERUTILS_OK)
00249 return write_error;
00250
00251
00252
00253 input->pivot_left = NULL;
00254 input->pivot_len = 0;
00255 input->leftover = false;
00256 }
00257
00258 while (*len > 0) {
00259 parserutils_error read_error, write_error;
00260 size_t pivot_len = sizeof(input->pivot_buf);
00261 uint8_t *pivot = (uint8_t *) input->pivot_buf;
00262
00263 read_error = parserutils_charset_codec_decode(input->read_codec,
00264 data, len,
00265 (uint8_t **) &pivot, &pivot_len);
00266
00267 pivot = (uint8_t *) input->pivot_buf;
00268 pivot_len = sizeof(input->pivot_buf) - pivot_len;
00269
00270 if (pivot_len > 0) {
00271 write_error = parserutils_charset_codec_encode(
00272 input->write_codec,
00273 (const uint8_t **) &pivot,
00274 &pivot_len,
00275 output, outlen);
00276
00277 if (write_error != PARSERUTILS_OK) {
00278 input->leftover = true;
00279 input->pivot_left = pivot;
00280 input->pivot_len = pivot_len;
00281
00282 return write_error;
00283 }
00284 }
00285
00286 if (read_error != PARSERUTILS_OK &&
00287 read_error != PARSERUTILS_NOMEM)
00288 return read_error;
00289 }
00290
00291 return PARSERUTILS_OK;
00292 #endif
00293 }
00294
00301 parserutils_error parserutils__filter_reset(parserutils_filter *input)
00302 {
00303 parserutils_error error = PARSERUTILS_OK;
00304
00305 if (input == NULL)
00306 return PARSERUTILS_BADPARM;
00307
00308 #ifndef WITHOUT_ICONV_FILTER
00309 iconv(input->cd, NULL, 0, NULL, 0);
00310 #else
00311
00312 input->pivot_left = NULL;
00313 input->pivot_len = 0;
00314 input->leftover = false;
00315
00316
00317 error = parserutils_charset_codec_reset(input->read_codec);
00318 if (error != PARSERUTILS_OK)
00319 return error;
00320
00321
00322 error = parserutils_charset_codec_reset(input->write_codec);
00323 if (error != PARSERUTILS_OK)
00324 return error;
00325 #endif
00326
00327 return error;
00328 }
00329
00336 parserutils_error filter_set_defaults(parserutils_filter *input)
00337 {
00338 parserutils_error error;
00339
00340 if (input == NULL)
00341 return PARSERUTILS_BADPARM;
00342
00343 #ifdef WITHOUT_ICONV_FILTER
00344 input->read_codec = NULL;
00345 input->write_codec = NULL;
00346 #endif
00347
00348 input->settings.encoding = 0;
00349 error = filter_set_encoding(input, "UTF-8");
00350 if (error != PARSERUTILS_OK)
00351 return error;
00352
00353 return PARSERUTILS_OK;
00354 }
00355
00363 parserutils_error filter_set_encoding(parserutils_filter *input,
00364 const char *enc)
00365 {
00366 parserutils_error error = PARSERUTILS_OK;
00367 uint16_t mibenum;
00368
00369 if (input == NULL || enc == NULL)
00370 return PARSERUTILS_BADPARM;
00371
00372 mibenum = parserutils_charset_mibenum_from_name(enc, strlen(enc));
00373 if (mibenum == 0)
00374 return PARSERUTILS_BADENCODING;
00375
00376
00377 if (input->settings.encoding == mibenum)
00378 return PARSERUTILS_OK;
00379
00380 #ifndef WITHOUT_ICONV_FILTER
00381 if (input->cd != (iconv_t) -1) {
00382 iconv_close(input->cd);
00383 input->cd = (iconv_t) -1;
00384 }
00385
00386 input->cd = iconv_open(
00387 parserutils_charset_mibenum_to_name(input->int_enc),
00388 parserutils_charset_mibenum_to_name(mibenum));
00389 if (input->cd == (iconv_t) -1) {
00390 return (errno == EINVAL) ? PARSERUTILS_BADENCODING
00391 : PARSERUTILS_NOMEM;
00392 }
00393 #else
00394 if (input->read_codec != NULL) {
00395 parserutils_charset_codec_destroy(input->read_codec);
00396 input->read_codec = NULL;
00397 }
00398
00399 error = parserutils_charset_codec_create(enc, &input->read_codec);
00400 if (error != PARSERUTILS_OK)
00401 return error;
00402 #endif
00403
00404 input->settings.encoding = mibenum;
00405
00406 return error;
00407
00408 }