inputstream.h

Go to the documentation of this file.
00001 /*
00002  * This file is part of LibParserUtils.
00003  * Licensed under the MIT License,
00004  *                http://www.opensource.org/licenses/mit-license.php
00005  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
00006  */
00007 
00008 #ifndef parserutils_input_inputstream_h_
00009 #define parserutils_input_inputstream_h_
00010 
00011 #ifdef __cplusplus
00012 extern "C"
00013 {
00014 #endif
00015 
00016 #include <stdbool.h>
00017 #ifndef NDEBUG
00018 #include <stdio.h>
00019 #endif
00020 #include <stdlib.h>
00021 #include <inttypes.h>
00022 
00023 #include <parserutils/errors.h>
00024 #include <parserutils/functypes.h>
00025 #include <parserutils/types.h>
00026 #include <parserutils/charset/utf8.h>
00027 #include <parserutils/utils/buffer.h>
00028 
00032 typedef parserutils_error (*parserutils_charset_detect_func)(
00033                 const uint8_t *data, size_t len, 
00034                 uint16_t *mibenum, uint32_t *source);
00035 
00039 typedef struct parserutils_inputstream 
00040 {
00041         parserutils_buffer *utf8;       
00043         uint32_t cursor;                
00045         bool had_eof;                   
00046 } parserutils_inputstream;
00047 
00048 /* Create an input stream */
00049 parserutils_error parserutils_inputstream_create(const char *enc,
00050                 uint32_t encsrc, parserutils_charset_detect_func csdetect,
00051                 parserutils_inputstream **stream);
00052 /* Destroy an input stream */
00053 parserutils_error parserutils_inputstream_destroy(
00054                 parserutils_inputstream *stream);
00055 
00056 /* Append data to an input stream */
00057 parserutils_error parserutils_inputstream_append(
00058                 parserutils_inputstream *stream,
00059                 const uint8_t *data, size_t len);
00060 /* Insert data into stream at current location */
00061 parserutils_error parserutils_inputstream_insert(
00062                 parserutils_inputstream *stream,
00063                 const uint8_t *data, size_t len);
00064 
00065 /* Slow form of css_inputstream_peek. */
00066 parserutils_error parserutils_inputstream_peek_slow(
00067                 parserutils_inputstream *stream, 
00068                 size_t offset, const uint8_t **ptr, size_t *length);
00069 
00091 static inline parserutils_error parserutils_inputstream_peek(
00092                 parserutils_inputstream *stream, size_t offset, 
00093                 const uint8_t **ptr, size_t *length)
00094 {
00095         parserutils_error error = PARSERUTILS_OK;
00096         const parserutils_buffer *utf8;
00097         const uint8_t *utf8_data;
00098         size_t len, off, utf8_len;
00099 
00100         if (stream == NULL || ptr == NULL || length == NULL)
00101                 return PARSERUTILS_BADPARM;
00102 
00103 #ifndef NDEBUG
00104 #ifdef VERBOSE_INPUTSTREAM
00105         fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
00106                         stream->utf8->length, stream->cursor, offset);
00107 #endif
00108 #ifdef RANDOMISE_INPUTSTREAM
00109         parserutils_buffer_randomise(stream->utf8);
00110 #endif
00111 #endif
00112 
00113         utf8 = stream->utf8;
00114         utf8_data = utf8->data;
00115         utf8_len = utf8->length;
00116         off = stream->cursor + offset;
00117 
00118 #define IS_ASCII(x) (((x) & 0x80) == 0)
00119 
00120         if (off < utf8_len) {
00121                 if (IS_ASCII(utf8_data[off])) {
00122                         /* Early exit for ASCII case */
00123                         (*length) = 1;
00124                         (*ptr) = (utf8_data + off);
00125                         return PARSERUTILS_OK;
00126                 } else {
00127                         error = parserutils_charset_utf8_char_byte_length(
00128                                 utf8_data + off, &len);
00129 
00130                         if (error == PARSERUTILS_OK) {
00131                                 (*length) = len;
00132                                 (*ptr) = (utf8_data + off);
00133                                 return PARSERUTILS_OK;
00134                         } else if (error != PARSERUTILS_NEEDDATA) {
00135                                 return error;
00136                         }
00137                 }
00138         }
00139 
00140 #undef IS_ASCII
00141 
00142         return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
00143 }
00144 
00151 static inline void parserutils_inputstream_advance(
00152                 parserutils_inputstream *stream, size_t bytes)
00153 {
00154         if (stream == NULL)
00155                 return;
00156 
00157 #if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
00158         fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
00159                         stream->utf8->length, stream->cursor, bytes);
00160 #endif
00161 
00162         if (bytes > stream->utf8->length - stream->cursor)
00163                 bytes = stream->utf8->length - stream->cursor;
00164 
00165         if (stream->cursor == stream->utf8->length)
00166                 return;
00167 
00168         stream->cursor += bytes;
00169 }
00170 
00171 /* Read the document charset */
00172 const char *parserutils_inputstream_read_charset(
00173                 parserutils_inputstream *stream, uint32_t *source);
00174 /* Change the document charset */
00175 parserutils_error parserutils_inputstream_change_charset(
00176                 parserutils_inputstream *stream, 
00177                 const char *enc, uint32_t source);
00178 
00179 #ifdef __cplusplus
00180 }
00181 #endif
00182 
00183 #endif
00184 

Generated on Wed Jul 29 11:59:20 2015 for Libparserutils by  doxygen 1.5.6