inputstream.h File Reference

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <parserutils/errors.h>
#include <parserutils/functypes.h>
#include <parserutils/types.h>
#include <parserutils/charset/utf8.h>
#include <parserutils/utils/buffer.h>

Go to the source code of this file.

Data Structures

struct  parserutils_inputstream
 Input stream object. More...

Defines

#define IS_ASCII(x)   (((x) & 0x80) == 0)

Typedefs

typedef parserutils_error(* parserutils_charset_detect_func )(const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)
 Type of charset detection function.

Functions

parserutils_error parserutils_inputstream_create (const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream)
 Create an input stream.
parserutils_error parserutils_inputstream_destroy (parserutils_inputstream *stream)
 Destroy an input stream.
parserutils_error parserutils_inputstream_append (parserutils_inputstream *stream, const uint8_t *data, size_t len)
 Append data to an input stream.
parserutils_error parserutils_inputstream_insert (parserutils_inputstream *stream, const uint8_t *data, size_t len)
 Insert data into stream at current location.
parserutils_error parserutils_inputstream_peek_slow (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
 Look at the character in the stream that starts at offset bytes from the cursor (slow version).
static parserutils_error parserutils_inputstream_peek (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
 Look at the character in the stream that starts at offset bytes from the cursor.
static void parserutils_inputstream_advance (parserutils_inputstream *stream, size_t bytes)
 Advance the stream's current position.
const char * parserutils_inputstream_read_charset (parserutils_inputstream *stream, uint32_t *source)
 Read the source charset of the input stream.
parserutils_error parserutils_inputstream_change_charset (parserutils_inputstream *stream, const char *enc, uint32_t source)
 Change the source charset of the input stream.


Define Documentation

#define IS_ASCII (  )     (((x) & 0x80) == 0)


Typedef Documentation

typedef parserutils_error(* parserutils_charset_detect_func)(const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)

Type of charset detection function.

Definition at line 32 of file inputstream.h.


Function Documentation

static void parserutils_inputstream_advance ( parserutils_inputstream stream,
size_t  bytes 
) [inline, static]

Advance the stream's current position.

Parameters:
stream The stream whose position to advance
bytes The number of bytes to advance

Definition at line 151 of file inputstream.h.

References parserutils_inputstream::cursor, parserutils_buffer::length, and parserutils_inputstream::utf8.

parserutils_error parserutils_inputstream_append ( parserutils_inputstream stream,
const uint8_t *  data,
size_t  len 
)

Append data to an input stream.

Parameters:
stream Input stream to append data to
data Data to append (in document charset), or NULL to flag EOF
len Length, in bytes, of data
Returns:
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 169 of file inputstream.c.

References parserutils_inputstream::had_eof, PARSERUTILS_BADPARM, parserutils_buffer_append(), PARSERUTILS_OK, parserutils_inputstream_private::public, and parserutils_inputstream_private::raw.

parserutils_error parserutils_inputstream_change_charset ( parserutils_inputstream stream,
const char *  enc,
uint32_t  source 
)

Change the source charset of the input stream.

Parameters:
stream Input stream to modify
enc Charset name
source Charset source identifier
Returns:
PARSERUTILS_OK on success, PARSERUTILS_BADPARM on invalid parameters, PARSERUTILS_INVALID if called after data has been read from stream, PARSERUTILS_BADENCODING if the encoding is unsupported, PARSERUTILS_NOMEM on memory exhaustion.

Definition at line 321 of file inputstream.c.

References parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_INVALID, and PARSERUTILS_OK.

parserutils_error parserutils_inputstream_create ( const char *  enc,
uint32_t  encsrc,
parserutils_charset_detect_func  csdetect,
parserutils_inputstream **  stream 
)

Create an input stream.

Parameters:
enc Document charset, or NULL to autodetect
encsrc Value for encoding source, if specified, or 0
csdetect Charset detection function, or NULL
stream Pointer to location to receive stream instance
Returns:
PARSERUTILS_OK on success, PARSERUTILS_BADPARM on bad parameters, PARSERUTILS_NOMEM on memory exhaustion, PARSERUTILS_BADENCODING on unsupported encoding
The value 0 is defined as being the lowest priority encoding source (i.e. the default fallback encoding). Beyond this, no further interpretation is made upon the encoding source.

Definition at line 59 of file inputstream.c.

References parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_create(), parserutils__filter_destroy(), parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_buffer_create(), parserutils_buffer_destroy(), parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

parserutils_error parserutils_inputstream_destroy ( parserutils_inputstream stream  ) 

Destroy an input stream.

Parameters:
stream Input stream to destroy
Returns:
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 144 of file inputstream.c.

References parserutils_inputstream_private::input, parserutils__filter_destroy(), PARSERUTILS_BADPARM, parserutils_buffer_destroy(), PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

parserutils_error parserutils_inputstream_insert ( parserutils_inputstream stream,
const uint8_t *  data,
size_t  len 
)

Insert data into stream at current location.

Parameters:
stream Input stream to insert into
data Data to insert (UTF-8 encoded)
len Length, in bytes, of data
Returns:
PARSERUTILS_OK on success, appropriate error otherwise

Definition at line 195 of file inputstream.c.

References parserutils_inputstream::cursor, PARSERUTILS_BADPARM, parserutils_buffer_insert(), parserutils_inputstream_private::public, and parserutils_inputstream::utf8.

static parserutils_error parserutils_inputstream_peek ( parserutils_inputstream stream,
size_t  offset,
const uint8_t **  ptr,
size_t *  length 
) [inline, static]

Look at the character in the stream that starts at offset bytes from the cursor.

Parameters:
stream Stream to look in
offset Byte offset of start of character
ptr Pointer to location to receive pointer to character data
length Pointer to location to receive character length (in bytes)
Returns:
PARSERUTILS_OK on success, _NEEDDATA on reaching the end of available input, _EOF on reaching the end of all input, _BADENCODING if the input cannot be decoded, _NOMEM on memory exhaustion, _BADPARM if bad parameters are passed.
Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.

Definition at line 91 of file inputstream.h.

References parserutils_inputstream::cursor, parserutils_buffer::data, IS_ASCII, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_buffer_randomise(), parserutils_charset_utf8_char_byte_length(), parserutils_inputstream_peek_slow(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, and parserutils_inputstream::utf8.

parserutils_error parserutils_inputstream_peek_slow ( parserutils_inputstream stream,
size_t  offset,
const uint8_t **  ptr,
size_t *  length 
)

Look at the character in the stream that starts at offset bytes from the cursor (slow version).

Parameters:
stream Stream to look in
offset Byte offset of start of character
ptr Pointer to location to receive pointer to character data
length Pointer to location to receive character length (in bytes)
Returns:
PARSERUTILS_OK on success, _NEEDDATA on reaching the end of available input, _EOF on reaching the end of all input, _BADENCODING if the input cannot be decoded, _NOMEM on memory exhaustion, _BADPARM if bad parameters are passed.
Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.

Definition at line 232 of file inputstream.c.

References parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream::had_eof, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_charset_utf8_char_byte_length(), PARSERUTILS_EOF, parserutils_inputstream_refill_buffer(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.

Referenced by parserutils_inputstream_peek().

const char* parserutils_inputstream_read_charset ( parserutils_inputstream stream,
uint32_t *  source 
)

Read the source charset of the input stream.

Parameters:
stream Input stream to query
source Pointer to location to receive charset source identifier
Returns:
Pointer to charset name (constant; do not free)

Definition at line 292 of file inputstream.c.

References parserutils_inputstream_private::encsrc, parserutils_inputstream_private::mibenum, and parserutils_charset_mibenum_to_name().


Generated on Wed Jul 29 11:59:21 2015 for Libparserutils by  doxygen 1.5.6