inputstream.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008 #ifndef parserutils_input_inputstream_h_
00009 #define parserutils_input_inputstream_h_
00010
00011 #ifdef __cplusplus
00012 extern "C"
00013 {
00014 #endif
00015
00016 #include <stdbool.h>
00017 #ifndef NDEBUG
00018 #include <stdio.h>
00019 #endif
00020 #include <stdlib.h>
00021 #include <inttypes.h>
00022
00023 #include <parserutils/errors.h>
00024 #include <parserutils/functypes.h>
00025 #include <parserutils/types.h>
00026 #include <parserutils/charset/utf8.h>
00027 #include <parserutils/utils/buffer.h>
00028
00032 typedef parserutils_error (*parserutils_charset_detect_func)(
00033 const uint8_t *data, size_t len,
00034 uint16_t *mibenum, uint32_t *source);
00035
00039 typedef struct parserutils_inputstream
00040 {
00041 parserutils_buffer *utf8;
00043 uint32_t cursor;
00045 bool had_eof;
00046 } parserutils_inputstream;
00047
00048
00049 parserutils_error parserutils_inputstream_create(const char *enc,
00050 uint32_t encsrc, parserutils_charset_detect_func csdetect,
00051 parserutils_inputstream **stream);
00052
00053 parserutils_error parserutils_inputstream_destroy(
00054 parserutils_inputstream *stream);
00055
00056
00057 parserutils_error parserutils_inputstream_append(
00058 parserutils_inputstream *stream,
00059 const uint8_t *data, size_t len);
00060
00061 parserutils_error parserutils_inputstream_insert(
00062 parserutils_inputstream *stream,
00063 const uint8_t *data, size_t len);
00064
00065
00066 parserutils_error parserutils_inputstream_peek_slow(
00067 parserutils_inputstream *stream,
00068 size_t offset, const uint8_t **ptr, size_t *length);
00069
00091 static inline parserutils_error parserutils_inputstream_peek(
00092 parserutils_inputstream *stream, size_t offset,
00093 const uint8_t **ptr, size_t *length)
00094 {
00095 parserutils_error error = PARSERUTILS_OK;
00096 const parserutils_buffer *utf8;
00097 const uint8_t *utf8_data;
00098 size_t len, off, utf8_len;
00099
00100 if (stream == NULL || ptr == NULL || length == NULL)
00101 return PARSERUTILS_BADPARM;
00102
00103 #ifndef NDEBUG
00104 #ifdef VERBOSE_INPUTSTREAM
00105 fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
00106 stream->utf8->length, stream->cursor, offset);
00107 #endif
00108 #ifdef RANDOMISE_INPUTSTREAM
00109 parserutils_buffer_randomise(stream->utf8);
00110 #endif
00111 #endif
00112
00113 utf8 = stream->utf8;
00114 utf8_data = utf8->data;
00115 utf8_len = utf8->length;
00116 off = stream->cursor + offset;
00117
00118 #define IS_ASCII(x) (((x) & 0x80) == 0)
00119
00120 if (off < utf8_len) {
00121 if (IS_ASCII(utf8_data[off])) {
00122
00123 (*length) = 1;
00124 (*ptr) = (utf8_data + off);
00125 return PARSERUTILS_OK;
00126 } else {
00127 error = parserutils_charset_utf8_char_byte_length(
00128 utf8_data + off, &len);
00129
00130 if (error == PARSERUTILS_OK) {
00131 (*length) = len;
00132 (*ptr) = (utf8_data + off);
00133 return PARSERUTILS_OK;
00134 } else if (error != PARSERUTILS_NEEDDATA) {
00135 return error;
00136 }
00137 }
00138 }
00139
00140 #undef IS_ASCII
00141
00142 return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
00143 }
00144
00151 static inline void parserutils_inputstream_advance(
00152 parserutils_inputstream *stream, size_t bytes)
00153 {
00154 if (stream == NULL)
00155 return;
00156
00157 #if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
00158 fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
00159 stream->utf8->length, stream->cursor, bytes);
00160 #endif
00161
00162 if (bytes > stream->utf8->length - stream->cursor)
00163 bytes = stream->utf8->length - stream->cursor;
00164
00165 if (stream->cursor == stream->utf8->length)
00166 return;
00167
00168 stream->cursor += bytes;
00169 }
00170
00171
00172 const char *parserutils_inputstream_read_charset(
00173 parserutils_inputstream *stream, uint32_t *source);
00174
00175 parserutils_error parserutils_inputstream_change_charset(
00176 parserutils_inputstream *stream,
00177 const char *enc, uint32_t source);
00178
00179 #ifdef __cplusplus
00180 }
00181 #endif
00182
00183 #endif
00184