#include <assert.h>#include <stdlib.h>#include <string.h>#include <parserutils/charset/mibenum.h>#include <parserutils/charset/utf8.h>#include <parserutils/input/inputstream.h>#include "input/filter.h"#include "utils/utils.h"Go to the source code of this file.
Data Structures | |
| struct | parserutils_inputstream_private |
| Private input stream definition. More... | |
Defines | |
| #define | IS_ASCII(x) (((x) & 0x80) == 0) |
| #define | UTF32_BOM_LEN (4) |
| #define | UTF16_BOM_LEN (2) |
| #define | UTF8_BOM_LEN (3) |
Functions | |
| static parserutils_error | parserutils_inputstream_refill_buffer (parserutils_inputstream_private *stream) |
| Refill the UTF-8 buffer from the raw buffer. | |
| static parserutils_error | parserutils_inputstream_strip_bom (uint16_t *mibenum, parserutils_buffer *buffer) |
| Strip a BOM from a buffer in the given encoding. | |
| parserutils_error | parserutils_inputstream_create (const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream) |
| Create an input stream. | |
| parserutils_error | parserutils_inputstream_destroy (parserutils_inputstream *stream) |
| Destroy an input stream. | |
| parserutils_error | parserutils_inputstream_append (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
| Append data to an input stream. | |
| parserutils_error | parserutils_inputstream_insert (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
| Insert data into stream at current location. | |
| parserutils_error | parserutils_inputstream_peek_slow (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length) |
| Look at the character in the stream that starts at offset bytes from the cursor (slow version). | |
| const char * | parserutils_inputstream_read_charset (parserutils_inputstream *stream, uint32_t *source) |
| Read the source charset of the input stream. | |
| parserutils_error | parserutils_inputstream_change_charset (parserutils_inputstream *stream, const char *enc, uint32_t source) |
| Change the source charset of the input stream. | |
| #define IS_ASCII | ( | x | ) | (((x) & 0x80) == 0) |
Definition at line 209 of file inputstream.c.
| #define UTF16_BOM_LEN (2) |
Referenced by parserutils_inputstream_strip_bom().
| #define UTF32_BOM_LEN (4) |
Referenced by parserutils_inputstream_strip_bom().
| #define UTF8_BOM_LEN (3) |
Referenced by parserutils_inputstream_strip_bom().
| parserutils_error parserutils_inputstream_append | ( | parserutils_inputstream * | stream, | |
| const uint8_t * | data, | |||
| size_t | len | |||
| ) |
Append data to an input stream.
| stream | Input stream to append data to | |
| data | Data to append (in document charset), or NULL to flag EOF | |
| len | Length, in bytes, of data |
Definition at line 169 of file inputstream.c.
References parserutils_inputstream::had_eof, PARSERUTILS_BADPARM, parserutils_buffer_append(), PARSERUTILS_OK, parserutils_inputstream_private::public, and parserutils_inputstream_private::raw.
| parserutils_error parserutils_inputstream_change_charset | ( | parserutils_inputstream * | stream, | |
| const char * | enc, | |||
| uint32_t | source | |||
| ) |
Change the source charset of the input stream.
| stream | Input stream to modify | |
| enc | Charset name | |
| source | Charset source identifier |
Definition at line 321 of file inputstream.c.
References parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_INVALID, and PARSERUTILS_OK.
| parserutils_error parserutils_inputstream_create | ( | const char * | enc, | |
| uint32_t | encsrc, | |||
| parserutils_charset_detect_func | csdetect, | |||
| parserutils_inputstream ** | stream | |||
| ) |
Create an input stream.
| enc | Document charset, or NULL to autodetect | |
| encsrc | Value for encoding source, if specified, or 0 | |
| csdetect | Charset detection function, or NULL | |
| stream | Pointer to location to receive stream instance |
Definition at line 59 of file inputstream.c.
References parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_create(), parserutils__filter_destroy(), parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_buffer_create(), parserutils_buffer_destroy(), parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
| parserutils_error parserutils_inputstream_destroy | ( | parserutils_inputstream * | stream | ) |
Destroy an input stream.
| stream | Input stream to destroy |
Definition at line 144 of file inputstream.c.
References parserutils_inputstream_private::input, parserutils__filter_destroy(), PARSERUTILS_BADPARM, parserutils_buffer_destroy(), PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
| parserutils_error parserutils_inputstream_insert | ( | parserutils_inputstream * | stream, | |
| const uint8_t * | data, | |||
| size_t | len | |||
| ) |
Insert data into stream at current location.
| stream | Input stream to insert into | |
| data | Data to insert (UTF-8 encoded) | |
| len | Length, in bytes, of data |
Definition at line 195 of file inputstream.c.
References parserutils_inputstream::cursor, PARSERUTILS_BADPARM, parserutils_buffer_insert(), parserutils_inputstream_private::public, and parserutils_inputstream::utf8.
| parserutils_error parserutils_inputstream_peek_slow | ( | parserutils_inputstream * | stream, | |
| size_t | offset, | |||
| const uint8_t ** | ptr, | |||
| size_t * | length | |||
| ) |
Look at the character in the stream that starts at offset bytes from the cursor (slow version).
| stream | Stream to look in | |
| offset | Byte offset of start of character | |
| ptr | Pointer to location to receive pointer to character data | |
| length | Pointer to location to receive character length (in bytes) |
Definition at line 232 of file inputstream.c.
References parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream::had_eof, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_charset_utf8_char_byte_length(), PARSERUTILS_EOF, parserutils_inputstream_refill_buffer(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
Referenced by parserutils_inputstream_peek().
| const char* parserutils_inputstream_read_charset | ( | parserutils_inputstream * | stream, | |
| uint32_t * | source | |||
| ) |
Read the source charset of the input stream.
| stream | Input stream to query | |
| source | Pointer to location to receive charset source identifier |
Definition at line 292 of file inputstream.c.
References parserutils_inputstream_private::encsrc, parserutils_inputstream_private::mibenum, and parserutils_charset_mibenum_to_name().
| parserutils_error parserutils_inputstream_refill_buffer | ( | parserutils_inputstream_private * | stream | ) | [inline, static] |
Refill the UTF-8 buffer from the raw buffer.
| stream | The inputstream to operate on |
Definition at line 365 of file inputstream.c.
References parserutils_buffer::allocated, parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_buffer::length, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_process_chunk(), parserutils__filter_setopt(), parserutils_buffer_discard(), parserutils_buffer_grow(), parserutils_charset_mibenum_from_name(), parserutils_charset_mibenum_to_name(), PARSERUTILS_FILTER_SET_ENCODING, parserutils_inputstream_strip_bom(), PARSERUTILS_NEEDDATA, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, SLEN, and parserutils_inputstream::utf8.
Referenced by parserutils_inputstream_peek_slow().
| parserutils_error parserutils_inputstream_strip_bom | ( | uint16_t * | mibenum, | |
| parserutils_buffer * | buffer | |||
| ) | [inline, static] |
Strip a BOM from a buffer in the given encoding.
| mibenum | Pointer to the character set of the buffer, updated on exit | |
| buffer | The buffer to process |
Definition at line 496 of file inputstream.c.
References parserutils_buffer::data, parserutils_buffer::length, parserutils_buffer_discard(), parserutils_charset_mibenum_from_name(), PARSERUTILS_OK, SLEN, UTF16_BOM_LEN, UTF32_BOM_LEN, and UTF8_BOM_LEN.
Referenced by parserutils_inputstream_refill_buffer().
1.5.6