#include <assert.h>#include <stdlib.h>#include <string.h>#include <parserutils/charset/mibenum.h>#include "charset/codecs/codec_impl.h"#include "utils/endian.h"#include "utils/utils.h"#include "charset/codecs/8859_tables.h"Go to the source code of this file.
Data Structures | |
| struct | charset_8859_codec |
| ISO-8859-n charset codec. More... | |
Defines | |
| #define | READ_BUFSIZE (8) |
| #define | WRITE_BUFSIZE (8) |
Functions | |
| static bool | charset_8859_codec_handles_charset (const char *charset) |
| Determine whether this codec handles a specific charset. | |
| static parserutils_error | charset_8859_codec_create (const char *charset, parserutils_charset_codec **codec) |
| Create an ISO-8859-n codec. | |
| static parserutils_error | charset_8859_codec_destroy (parserutils_charset_codec *codec) |
| Destroy an ISO-8859-n codec. | |
| static parserutils_error | charset_8859_codec_encode (parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen) |
| Encode a chunk of UCS-4 (big endian) data into ISO-8859-n. | |
| static parserutils_error | charset_8859_codec_decode (parserutils_charset_codec *codec, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen) |
| Decode a chunk of ISO-8859-n data into UCS-4 (big endian). | |
| static parserutils_error | charset_8859_codec_reset (parserutils_charset_codec *codec) |
| Clear an ISO-8859-n codec's encoding state. | |
| static parserutils_error | charset_8859_codec_read_char (charset_8859_codec *c, const uint8_t **source, size_t *sourcelen, uint8_t **dest, size_t *destlen) |
| Read a character from the ISO-8859-n to UCS-4 (big endian). | |
| static parserutils_error | charset_8859_codec_output_decoded_char (charset_8859_codec *c, uint32_t ucs4, uint8_t **dest, size_t *destlen) |
| Output a UCS-4 character (big endian). | |
| static parserutils_error | charset_8859_from_ucs4 (charset_8859_codec *c, uint32_t ucs4, uint8_t **s, size_t *len) |
| Convert a UCS4 (host endian) character to ISO-8859-n. | |
| static parserutils_error | charset_8859_to_ucs4 (charset_8859_codec *c, const uint8_t *s, size_t len, uint32_t *ucs4) |
| Convert an ISO-8859-n character to UCS4 (host endian). | |
Variables | |
| struct { | |
| uint16_t mib | |
| const char * name | |
| size_t len | |
| uint32_t * table | |
| } | known_charsets [] |
| const parserutils_charset_handler | charset_8859_codec_handler |
| #define READ_BUFSIZE (8) |
Definition at line 51 of file codec_8859.c.
| #define WRITE_BUFSIZE (8) |
Definition at line 57 of file codec_8859.c.
Referenced by charset_8859_codec_encode(), charset_ascii_codec_encode(), charset_ext8_codec_encode(), charset_utf16_codec_encode(), and charset_utf8_codec_encode().
| parserutils_error charset_8859_codec_create | ( | const char * | charset, | |
| parserutils_charset_codec ** | codec | |||
| ) | [static] |
Create an ISO-8859-n codec.
| charset | The charset to read from / write to | |
| codec | Pointer to location to receive codec |
Definition at line 130 of file codec_8859.c.
References charset_8859_codec::base, charset_8859_codec_decode(), charset_8859_codec_destroy(), charset_8859_codec_encode(), charset_8859_codec_reset(), parserutils_charset_codec::decode, parserutils_charset_codec::destroy, parserutils_charset_codec::encode, parserutils_charset_codec::handler, known_charsets, mib, N_ELEMENTS, parserutils_charset_mibenum_from_name(), PARSERUTILS_NOMEM, PARSERUTILS_OK, charset_8859_codec::read_buf, charset_8859_codec::read_len, parserutils_charset_codec::reset, charset_8859_codec::table, table, charset_8859_codec::write_buf, and charset_8859_codec::write_len.
| parserutils_error charset_8859_codec_decode | ( | parserutils_charset_codec * | codec, | |
| const uint8_t ** | source, | |||
| size_t * | sourcelen, | |||
| uint8_t ** | dest, | |||
| size_t * | destlen | |||
| ) | [static] |
Decode a chunk of ISO-8859-n data into UCS-4 (big endian).
| codec | The codec to use | |
| source | Pointer to pointer to source data | |
| sourcelen | Pointer to length (in bytes) of source data | |
| dest | Pointer to pointer to output buffer | |
| destlen | Pointer to length (in bytes) of output buffer |
In the case of the result being _INVALID, source will point _at_ the last input character read; nothing will be written or buffered for the failed character. It is up to the client to fix the cause of the failure and retry the decoding process.
Note that, if failure occurs whilst attempting to write any output buffered by the last call, then source and sourcelen will remain unchanged (as nothing more has been read).
If STRICT error handling is configured and an illegal sequence is split over two calls, then _INVALID will be returned from the second call, but source will point mid-way through the invalid sequence (i.e. it will be unmodified over the second call). In addition, the internal incomplete-sequence buffer will be emptied, such that subsequent calls will progress, rather than re-evaluating the same invalid sequence.
sourcelen will be reduced appropriately on exit.
dest will point immediately _after_ the last character written.
destlen will be reduced appropriately on exit.
Call this with a source length of 0 to flush the output buffer.
Definition at line 330 of file codec_8859.c.
References charset_8859_codec_read_char(), endian_host_to_big(), PARSERUTILS_NOMEM, PARSERUTILS_OK, charset_8859_codec::read_buf, and charset_8859_codec::read_len.
Referenced by charset_8859_codec_create().
| parserutils_error charset_8859_codec_destroy | ( | parserutils_charset_codec * | codec | ) | [static] |
Destroy an ISO-8859-n codec.
| codec | The codec to destroy |
Definition at line 177 of file codec_8859.c.
References PARSERUTILS_OK, and UNUSED.
Referenced by charset_8859_codec_create().
| parserutils_error charset_8859_codec_encode | ( | parserutils_charset_codec * | codec, | |
| const uint8_t ** | source, | |||
| size_t * | sourcelen, | |||
| uint8_t ** | dest, | |||
| size_t * | destlen | |||
| ) | [static] |
Encode a chunk of UCS-4 (big endian) data into ISO-8859-n.
| codec | The codec to use | |
| source | Pointer to pointer to source data | |
| sourcelen | Pointer to length (in bytes) of source data | |
| dest | Pointer to pointer to output buffer | |
| destlen | Pointer to length (in bytes) of output buffer |
Note that, if failure occurs whilst attempting to write any output buffered by the last call, then source and sourcelen will remain unchanged (as nothing more has been read).
sourcelen will be reduced appropriately on exit.
dest will point immediately _after_ the last character written.
destlen will be reduced appropriately on exit.
Definition at line 211 of file codec_8859.c.
References charset_8859_from_ucs4(), endian_big_to_host(), len, PARSERUTILS_NOMEM, PARSERUTILS_OK, charset_8859_codec::write_buf, WRITE_BUFSIZE, and charset_8859_codec::write_len.
Referenced by charset_8859_codec_create().
| bool charset_8859_codec_handles_charset | ( | const char * | charset | ) | [static] |
Determine whether this codec handles a specific charset.
| charset | Charset to test |
Definition at line 98 of file codec_8859.c.
References known_charsets, len, mib, N_ELEMENTS, name, and parserutils_charset_mibenum_from_name().
| parserutils_error charset_8859_codec_output_decoded_char | ( | charset_8859_codec * | c, | |
| uint32_t | ucs4, | |||
| uint8_t ** | dest, | |||
| size_t * | destlen | |||
| ) | [inline, static] |
Output a UCS-4 character (big endian).
| c | Codec to use | |
| ucs4 | UCS-4 character (host endian) | |
| dest | Pointer to pointer to output buffer | |
| destlen | Pointer to output buffer length |
Definition at line 481 of file codec_8859.c.
References endian_host_to_big(), PARSERUTILS_NOMEM, PARSERUTILS_OK, charset_8859_codec::read_buf, and charset_8859_codec::read_len.
Referenced by charset_8859_codec_read_char().
| parserutils_error charset_8859_codec_read_char | ( | charset_8859_codec * | c, | |
| const uint8_t ** | source, | |||
| size_t * | sourcelen, | |||
| uint8_t ** | dest, | |||
| size_t * | destlen | |||
| ) | [inline, static] |
Read a character from the ISO-8859-n to UCS-4 (big endian).
| c | The codec | |
| source | Pointer to pointer to source buffer (updated on exit) | |
| sourcelen | Pointer to length of source buffer (updated on exit) | |
| dest | Pointer to pointer to output buffer (updated on exit) | |
| destlen | Pointer to length of output buffer (updated on exit) |
In the case of the result being _INVALID, source will point _at_ the last input character read; nothing will be written or buffered for the failed character. It is up to the client to fix the cause of the failure and retry the decoding process.
sourcelen will be reduced appropriately on exit.
dest will point immediately _after_ the last character written.
destlen will be reduced appropriately on exit.
Definition at line 424 of file codec_8859.c.
References charset_8859_codec::base, charset_8859_codec_output_decoded_char(), charset_8859_to_ucs4(), parserutils_charset_codec::errormode, PARSERUTILS_CHARSET_CODEC_ERROR_STRICT, PARSERUTILS_INVALID, PARSERUTILS_NEEDDATA, PARSERUTILS_NOMEM, and PARSERUTILS_OK.
Referenced by charset_8859_codec_decode().
| parserutils_error charset_8859_codec_reset | ( | parserutils_charset_codec * | codec | ) | [static] |
Clear an ISO-8859-n codec's encoding state.
| codec | The codec to reset |
Definition at line 382 of file codec_8859.c.
References PARSERUTILS_OK, charset_8859_codec::read_buf, charset_8859_codec::read_len, charset_8859_codec::write_buf, and charset_8859_codec::write_len.
Referenced by charset_8859_codec_create().
| parserutils_error charset_8859_from_ucs4 | ( | charset_8859_codec * | c, | |
| uint32_t | ucs4, | |||
| uint8_t ** | s, | |||
| size_t * | len | |||
| ) | [inline, static] |
Convert a UCS4 (host endian) character to ISO-8859-n.
| c | The codec instance | |
| ucs4 | The UCS4 character to convert | |
| s | Pointer to pointer to destination buffer | |
| len | Pointer to destination buffer length |
On successful conversion, *s and *len will be updated.
Definition at line 515 of file codec_8859.c.
References charset_8859_codec::base, parserutils_charset_codec::errormode, PARSERUTILS_CHARSET_CODEC_ERROR_STRICT, PARSERUTILS_INVALID, PARSERUTILS_NOMEM, PARSERUTILS_OK, and charset_8859_codec::table.
Referenced by charset_8859_codec_encode().
| parserutils_error charset_8859_to_ucs4 | ( | charset_8859_codec * | c, | |
| const uint8_t * | s, | |||
| size_t | len, | |||
| uint32_t * | ucs4 | |||
| ) | [inline, static] |
Convert an ISO-8859-n character to UCS4 (host endian).
| c | The codec instance | |
| s | Pointer to source buffer | |
| len | Source buffer length | |
| ucs4 | Pointer to destination buffer |
Definition at line 563 of file codec_8859.c.
References PARSERUTILS_INVALID, PARSERUTILS_NEEDDATA, PARSERUTILS_OK, and charset_8859_codec::table.
Referenced by charset_8859_codec_read_char().
Initial value:
Definition at line 587 of file codec_8859.c.
struct { ... } known_charsets[] [static] |
| size_t len |
Definition at line 23 of file codec_8859.c.
Referenced by charset_8859_codec_encode(), charset_8859_codec_handles_charset(), charset_ascii_codec_encode(), charset_ext8_codec_encode(), charset_ext8_codec_handles_charset(), charset_utf16_codec_encode(), charset_utf8_codec_encode(), and parserutils_inputstream_peek_slow().
| uint16_t mib |
Definition at line 21 of file codec_8859.c.
Referenced by charset_8859_codec_create(), charset_8859_codec_handles_charset(), charset_ext8_codec_create(), and charset_ext8_codec_handles_charset().
| const char* name |
Definition at line 22 of file codec_8859.c.
Referenced by charset_8859_codec_handles_charset(), and charset_ext8_codec_handles_charset().
| uint32_t* table |
Definition at line 24 of file codec_8859.c.
Referenced by charset_8859_codec_create(), and charset_ext8_codec_create().
1.5.6