#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
Go to the source code of this file.
Defines | |
| #define | UTF8_TO_UCS4(s, len, ucs4, clen, error) |
| Convert a UTF-8 multibyte sequence into a single UCS-4 character. | |
| #define | UTF8_FROM_UCS4(ucs4, s, len, error) |
| Convert a single UCS-4 character into a UTF-8 multibyte sequence. | |
| #define | UTF8_LENGTH(s, max, len, error) |
| Calculate the length (in characters) of a bounded UTF-8 string. | |
| #define | UTF8_CHAR_BYTE_LENGTH(s, len, error) |
| Calculate the length (in bytes) of a UTF-8 character. | |
| #define | UTF8_PREV(s, off, prevoff, error) |
| Find previous legal UTF-8 char in string. | |
| #define | UTF8_NEXT(s, len, off, nextoff, error) |
| Find next legal UTF-8 char in string. | |
| #define | UTF8_NEXT_PARANOID(s, len, off, nextoff, error) |
| Skip to start of next sequence in UTF-8 input. | |
Variables | |
| const uint8_t | numContinuations [256] |
| Number of continuation bytes for a given start byte. | |
Definition in file utf8impl.h.
| #define UTF8_CHAR_BYTE_LENGTH | ( | s, | |||
| len, | |||||
| error | ) |
Value:
do { \ if (s == NULL || len == NULL) { \ error = PARSERUTILS_BADPARM; \ break; \ } \ \ *len = numContinuations[s[0]] + 1 /* Start byte */; \ \ error = PARSERUTILS_OK; \ } while(0)
| s | Pointer to start of character | |
| len | Pointer to location to receive length | |
| error | Location to receive error code |
Definition at line 228 of file utf8impl.h.
Referenced by parserutils_charset_utf8_char_byte_length().
| #define UTF8_FROM_UCS4 | ( | ucs4, | |||
| s, | |||||
| len, | |||||
| error | ) |
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
Encoding of UCS values outside the UTF-16 plane has been removed from RFC3629. This macro conforms to RFC2279, however.
| ucs4 | The character to process (0 <= c <= 0x7FFFFFFF) (host endian) | |
| s | Pointer to pointer to output buffer, updated on exit | |
| len | Pointer to length, in bytes, of output buffer, updated on exit | |
| error | Location to receive error code |
Definition at line 123 of file utf8impl.h.
Referenced by charset_utf8_codec_encode(), and parserutils_charset_utf8_from_ucs4().
| #define UTF8_LENGTH | ( | s, | |||
| max, | |||||
| len, | |||||
| error | ) |
Calculate the length (in characters) of a bounded UTF-8 string.
| s | The string | |
| max | Maximum length | |
| len | Pointer to location to receive length of string | |
| error | Location to receive error code |
Definition at line 182 of file utf8impl.h.
Referenced by parserutils_charset_utf8_length().
| #define UTF8_NEXT | ( | s, | |||
| len, | |||||
| off, | |||||
| nextoff, | |||||
| error | ) |
Value:
do { \ if (s == NULL || off >= len || nextoff == NULL) { \ error = PARSERUTILS_BADPARM; \ break; \ } \ \ /* Skip current start byte (if present - may be mid-sequence) */\ if (s[off] < 0x80 || (s[off] & 0xC0) == 0xC0) \ off++; \ \ while (off < len && (s[off] & 0xC0) == 0x80) \ off++; \ \ *nextoff = off; \ \ error = PARSERUTILS_OK; \ } while(0)
| s | The string (assumed valid) | |
| len | Maximum offset in string | |
| off | Offset in the string to start at | |
| nextoff | Pointer to location to receive offset of first byte of next legal character | |
| error | Location to receive error code |
Definition at line 274 of file utf8impl.h.
Referenced by parserutils_charset_utf8_next().
| #define UTF8_NEXT_PARANOID | ( | s, | |||
| len, | |||||
| off, | |||||
| nextoff, | |||||
| error | ) |
Skip to start of next sequence in UTF-8 input.
| s | The string (assumed to be of dubious validity) | |
| len | Maximum offset in string | |
| off | Offset in the string to start at | |
| nextoff | Pointer to location to receive offset of first byte of next legal character | |
| error | Location to receive error code |
Definition at line 303 of file utf8impl.h.
Referenced by charset_utf8_codec_read_char(), and parserutils_charset_utf8_next_paranoid().
| #define UTF8_PREV | ( | s, | |||
| off, | |||||
| prevoff, | |||||
| error | ) |
Value:
do { \ if (s == NULL || prevoff == NULL) { \ error = PARSERUTILS_BADPARM; \ break; \ } \ \ while (off != 0 && (s[--off] & 0xC0) == 0x80) \ /* do nothing */; \ \ *prevoff = off; \ \ error = PARSERUTILS_OK; \ } while(0)
| s | The string | |
| off | Offset in the string to start at | |
| prevoff | Pointer to location to receive offset of first byte of previous legal character | |
| error | Location to receive error code |
Definition at line 249 of file utf8impl.h.
Referenced by parserutils_charset_utf8_prev().
| #define UTF8_TO_UCS4 | ( | s, | |||
| len, | |||||
| ucs4, | |||||
| clen, | |||||
| error | ) |
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
Encoding of UCS values outside the UTF-16 plane has been removed from RFC3629. This macro conforms to RFC2279, however.
| s | The sequence to process | |
| len | Length of sequence | |
| ucs4 | Pointer to location to receive UCS-4 character (host endian) | |
| clen | Pointer to location to receive byte length of UTF-8 sequence | |
| error | Location to receive error code |
Definition at line 34 of file utf8impl.h.
Referenced by charset_utf8_codec_read_char(), and parserutils_charset_utf8_to_ucs4().
| const uint8_t numContinuations[256] |
1.5.6