filter.c

Go to the documentation of this file.
00001 /*
00002  * This file is part of LibParserUtils.
00003  * Licensed under the MIT License,
00004  *                http://www.opensource.org/licenses/mit-license.php
00005  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
00006  */
00007 
00008 #include <errno.h>
00009 #include <stdbool.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 
00013 #ifndef WITHOUT_ICONV_FILTER
00014 #include <iconv.h>
00015 #endif
00016 
00017 #include <parserutils/charset/mibenum.h>
00018 #include <parserutils/charset/codec.h>
00019 
00020 #include "input/filter.h"
00021 #include "utils/utils.h"
00022 
00024 struct parserutils_filter {
00025 #ifndef WITHOUT_ICONV_FILTER
00026         iconv_t cd;                     
00027         uint16_t int_enc;               
00028 #else
00029         parserutils_charset_codec *read_codec;  
00030         parserutils_charset_codec *write_codec; 
00032         uint32_t pivot_buf[64];         
00034         bool leftover;                  
00035         uint8_t *pivot_left;            
00036         size_t pivot_len;               
00037 #endif
00038 
00039         struct {
00040                 uint16_t encoding;      
00041         } settings;                     
00042 };
00043 
00044 static parserutils_error filter_set_defaults(parserutils_filter *input);
00045 static parserutils_error filter_set_encoding(parserutils_filter *input,
00046                 const char *enc);
00047 
00058 parserutils_error parserutils__filter_create(const char *int_enc,
00059                 parserutils_filter **filter)
00060 {
00061         parserutils_filter *f;
00062         parserutils_error error;
00063 
00064         if (int_enc == NULL || filter == NULL)
00065                 return PARSERUTILS_BADPARM;
00066 
00067         f = malloc(sizeof(parserutils_filter));
00068         if (f == NULL)
00069                 return PARSERUTILS_NOMEM;
00070 
00071 #ifndef WITHOUT_ICONV_FILTER
00072         f->cd = (iconv_t) -1;
00073         f->int_enc = parserutils_charset_mibenum_from_name(
00074                         int_enc, strlen(int_enc));
00075         if (f->int_enc == 0) {
00076                 free(f);
00077                 return PARSERUTILS_BADENCODING;
00078         }
00079 #else
00080         f->leftover = false;
00081         f->pivot_left = NULL;
00082         f->pivot_len = 0;
00083 #endif
00084 
00085         error = filter_set_defaults(f);
00086         if (error != PARSERUTILS_OK) {
00087                 free(f);
00088                 return error;
00089         }
00090 
00091 #ifdef WITHOUT_ICONV_FILTER
00092         error = parserutils_charset_codec_create(int_enc, &f->write_codec);
00093         if (error != PARSERUTILS_OK) {
00094                 if (f->read_codec != NULL) {
00095                         parserutils_charset_codec_destroy(f->read_codec);
00096                         f->read_codec = NULL;
00097                 }
00098                 free(f);
00099                 return error;
00100         }
00101 #endif
00102 
00103         *filter = f;
00104 
00105         return PARSERUTILS_OK;
00106 }
00107 
00114 parserutils_error parserutils__filter_destroy(parserutils_filter *input)
00115 {
00116         if (input == NULL)
00117                 return PARSERUTILS_BADPARM;
00118 
00119 #ifndef WITHOUT_ICONV_FILTER
00120         if (input->cd != (iconv_t) -1) {
00121                 iconv_close(input->cd);
00122                 input->cd = (iconv_t) -1;
00123         }
00124 #else
00125         if (input->read_codec != NULL) {
00126                 parserutils_charset_codec_destroy(input->read_codec);
00127                 input->read_codec = NULL;
00128         }
00129 
00130         if (input->write_codec != NULL) {
00131                 parserutils_charset_codec_destroy(input->write_codec);
00132                 input->write_codec = NULL;
00133         }
00134 #endif
00135 
00136         free(input);
00137 
00138         return PARSERUTILS_OK;
00139 }
00140 
00149 parserutils_error parserutils__filter_setopt(parserutils_filter *input,
00150                 parserutils_filter_opttype type,
00151                 parserutils_filter_optparams *params)
00152 {
00153         parserutils_error error = PARSERUTILS_OK;
00154 
00155         if (input == NULL || params == NULL)
00156                 return PARSERUTILS_BADPARM;
00157 
00158         switch (type) {
00159         case PARSERUTILS_FILTER_SET_ENCODING:
00160                 error = filter_set_encoding(input, params->encoding.name);
00161                 break;
00162         }
00163 
00164         return error;
00165 }
00166 
00179 parserutils_error parserutils__filter_process_chunk(parserutils_filter *input,
00180                 const uint8_t **data, size_t *len,
00181                 uint8_t **output, size_t *outlen)
00182 {
00183         if (input == NULL || data == NULL || *data == NULL || len == NULL ||
00184                         output == NULL || *output == NULL || outlen == NULL)
00185                 return PARSERUTILS_BADPARM;
00186 
00187 #ifndef WITHOUT_ICONV_FILTER
00188         if (iconv(input->cd, (void *) data, len, 
00189                         (char **) output, outlen) == (size_t) -1) {
00190                 switch (errno) {
00191                 case E2BIG:
00192                         return PARSERUTILS_NOMEM;
00193                 case EILSEQ:
00194                         if (*outlen < 3)
00195                                 return PARSERUTILS_NOMEM;
00196 
00197                         (*output)[0] = 0xef;
00198                         (*output)[1] = 0xbf;
00199                         (*output)[2] = 0xbd;
00200 
00201                         *output += 3;
00202                         *outlen -= 3;
00203 
00204                         (*data)++;
00205                         (*len)--;
00206 
00207                         while (*len > 0) {
00208                                 size_t ret;
00209                                 
00210                                 ret = iconv(input->cd, (void *) data, len, 
00211                                                 (char **) output, outlen);
00212                                 if (ret != (size_t) -1 || errno != EILSEQ)
00213                                         break;
00214 
00215                                 if (*outlen < 3)
00216                                         return PARSERUTILS_NOMEM;
00217 
00218                                 (*output)[0] = 0xef;
00219                                 (*output)[1] = 0xbf;
00220                                 (*output)[2] = 0xbd;
00221 
00222                                 *output += 3;
00223                                 *outlen -= 3;
00224 
00225                                 (*data)++;
00226                                 (*len)--;
00227                         }
00228 
00229                         return errno == E2BIG ? PARSERUTILS_NOMEM 
00230                                               : PARSERUTILS_OK;
00231                 }
00232         }
00233 
00234         return PARSERUTILS_OK;
00235 #else
00236         if (input->leftover) {
00237                 parserutils_error write_error;
00238 
00239                 /* Some data left to be written from last call */
00240 
00241                 /* Attempt to flush the remaining data. */
00242                 write_error = parserutils_charset_codec_encode(
00243                                 input->write_codec,
00244                                 (const uint8_t **) &input->pivot_left,
00245                                 &input->pivot_len,
00246                                 output, outlen);
00247 
00248                 if (write_error != PARSERUTILS_OK)
00249                         return write_error;
00250 
00251 
00252                 /* And clear leftover */
00253                 input->pivot_left = NULL;
00254                 input->pivot_len = 0;
00255                 input->leftover = false;
00256         }
00257 
00258         while (*len > 0) {
00259                 parserutils_error read_error, write_error;
00260                 size_t pivot_len = sizeof(input->pivot_buf);
00261                 uint8_t *pivot = (uint8_t *) input->pivot_buf;
00262 
00263                 read_error = parserutils_charset_codec_decode(input->read_codec,
00264                                 data, len,
00265                                 (uint8_t **) &pivot, &pivot_len);
00266 
00267                 pivot = (uint8_t *) input->pivot_buf;
00268                 pivot_len = sizeof(input->pivot_buf) - pivot_len;
00269 
00270                 if (pivot_len > 0) {
00271                         write_error = parserutils_charset_codec_encode(
00272                                         input->write_codec,
00273                                         (const uint8_t **) &pivot,
00274                                         &pivot_len,
00275                                         output, outlen);
00276 
00277                         if (write_error != PARSERUTILS_OK) {
00278                                 input->leftover = true;
00279                                 input->pivot_left = pivot;
00280                                 input->pivot_len = pivot_len;
00281 
00282                                 return write_error;
00283                         }
00284                 }
00285 
00286                 if (read_error != PARSERUTILS_OK && 
00287                                 read_error != PARSERUTILS_NOMEM)
00288                         return read_error;
00289         }
00290 
00291         return PARSERUTILS_OK;
00292 #endif
00293 }
00294 
00301 parserutils_error parserutils__filter_reset(parserutils_filter *input)
00302 {
00303         parserutils_error error = PARSERUTILS_OK;
00304 
00305         if (input == NULL)
00306                 return PARSERUTILS_BADPARM;
00307 
00308 #ifndef WITHOUT_ICONV_FILTER
00309         iconv(input->cd, NULL, 0, NULL, 0);
00310 #else
00311         /* Clear pivot buffer leftovers */
00312         input->pivot_left = NULL;
00313         input->pivot_len = 0;
00314         input->leftover = false;
00315 
00316         /* Reset read codec */
00317         error = parserutils_charset_codec_reset(input->read_codec);
00318         if (error != PARSERUTILS_OK)
00319                 return error;
00320 
00321         /* Reset write codec */
00322         error = parserutils_charset_codec_reset(input->write_codec);
00323         if (error != PARSERUTILS_OK)
00324                 return error;
00325 #endif
00326 
00327         return error;
00328 }
00329 
00336 parserutils_error filter_set_defaults(parserutils_filter *input)
00337 {
00338         parserutils_error error;
00339 
00340         if (input == NULL)
00341                 return PARSERUTILS_BADPARM;
00342 
00343 #ifdef WITHOUT_ICONV_FILTER
00344         input->read_codec = NULL;
00345         input->write_codec = NULL;
00346 #endif
00347 
00348         input->settings.encoding = 0;
00349         error = filter_set_encoding(input, "UTF-8");
00350         if (error != PARSERUTILS_OK)
00351                 return error;
00352 
00353         return PARSERUTILS_OK;
00354 }
00355 
00363 parserutils_error filter_set_encoding(parserutils_filter *input,
00364                 const char *enc)
00365 {
00366         parserutils_error error = PARSERUTILS_OK;
00367         uint16_t mibenum;
00368 
00369         if (input == NULL || enc == NULL)
00370                 return PARSERUTILS_BADPARM;
00371 
00372         mibenum = parserutils_charset_mibenum_from_name(enc, strlen(enc));
00373         if (mibenum == 0)
00374                 return PARSERUTILS_BADENCODING;
00375 
00376         /* Exit early if we're already using this encoding */
00377         if (input->settings.encoding == mibenum)
00378                 return PARSERUTILS_OK;
00379 
00380 #ifndef WITHOUT_ICONV_FILTER
00381         if (input->cd != (iconv_t) -1) {
00382                 iconv_close(input->cd);
00383                 input->cd = (iconv_t) -1;
00384         }
00385 
00386         input->cd = iconv_open(
00387                 parserutils_charset_mibenum_to_name(input->int_enc),
00388                 parserutils_charset_mibenum_to_name(mibenum));
00389         if (input->cd == (iconv_t) -1) {
00390                 return (errno == EINVAL) ? PARSERUTILS_BADENCODING
00391                                          : PARSERUTILS_NOMEM;
00392         }
00393 #else
00394         if (input->read_codec != NULL) {
00395                 parserutils_charset_codec_destroy(input->read_codec);
00396                 input->read_codec = NULL;
00397         }
00398 
00399         error = parserutils_charset_codec_create(enc, &input->read_codec);
00400         if (error != PARSERUTILS_OK)
00401                 return error;
00402 #endif
00403 
00404         input->settings.encoding = mibenum;
00405 
00406         return error;
00407 
00408 }

Generated on Wed Jul 29 11:59:21 2015 for Libparserutils by  doxygen 1.5.6