/* $NetBSD: citrus_hz.c,v 1.4 2014/06/24 22:24:18 spz Exp $ */ /*- * Copyright (c)2004, 2006 Citrus Project, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #if defined(LIBC_SCCS) && !defined(lint) __RCSID("$NetBSD: citrus_hz.c,v 1.4 2014/06/24 22:24:18 spz Exp $"); #endif /* LIBC_SCCS and not lint */ #include #include #include #include #include #include #include #include #include #include #include "citrus_namespace.h" #include "citrus_types.h" #include "citrus_bcs.h" #include "citrus_module.h" #include "citrus_ctype.h" #include "citrus_stdenc.h" #include "citrus_hz.h" #include "citrus_prop.h" /* * wchar_t mapping: * * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx */ #define ESCAPE_CHAR '~' typedef enum { CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 } charset_t; typedef struct { int start, end, width; } range_t; static const range_t ranges[] = { #define RANGE(start, end) { start, end, (end - start) + 1 } /* CTRL */ RANGE(0x00, 0x1F), /* ASCII */ RANGE(0x20, 0x7F), /* GB2312 */ RANGE(0x21, 0x7E), /* CS94 */ RANGE(0x21, 0x7E), /* CS96 */ RANGE(0x20, 0x7F), #undef RANGE }; typedef struct escape_t escape_t; typedef struct { charset_t charset; size_t length; #define ROWCOL_MAX 3 escape_t *escape; } graphic_t; typedef TAILQ_HEAD(escape_list, escape_t) escape_list; struct escape_t { TAILQ_ENTRY(escape_t) entry; int ch; graphic_t *left, *right; escape_list *set; }; #define GL(escape) ((escape)->left) #define GR(escape) ((escape)->right) #define SET(escape) ((escape)->set) #define ESC(escape) ((escape)->ch) #define INIT(escape) (TAILQ_FIRST(SET(escape))) static __inline escape_t * find_escape(escape_list *set, int ch) { escape_t *escape; _DIAGASSERT(set != NULL); TAILQ_FOREACH(escape, set, entry) { if (ESC(escape) == ch) break; } return escape; } typedef struct { escape_list e0, e1; graphic_t *ascii, *gb2312; } _HZEncodingInfo; #define E0SET(ei) (&(ei)->e0) #define E1SET(ei) (&(ei)->e1) #define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) #define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) typedef struct { int chlen; char ch[ROWCOL_MAX]; escape_t *inuse; } _HZState; typedef struct { _HZEncodingInfo ei; struct { /* for future multi-locale facility */ _HZState s_mblen; _HZState s_mbrlen; _HZState s_mbrtowc; _HZState s_mbtowc; _HZState s_mbsrtowcs; _HZState s_mbsnrtowcs; _HZState s_wcrtomb; _HZState s_wcsrtombs; _HZState s_wcsnrtombs; _HZState s_wctomb; } states; } _HZCTypeInfo; #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ #define _FUNCNAME(m) _citrus_HZ_##m #define _ENCODING_INFO _HZEncodingInfo #define _CTYPE_INFO _HZCTypeInfo #define _ENCODING_STATE _HZState #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX #define _ENCODING_IS_STATE_DEPENDENT 1 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) static __inline void _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, _HZState * __restrict psenc) { _DIAGASSERT(ei != NULL); _DIAGASSERT(psenc != NULL); psenc->chlen = 0; psenc->inuse = INIT0(ei); } static __inline void /*ARGSUSED*/ _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei, void *__restrict pspriv, const _HZState * __restrict psenc) { /* ei may be unused */ _DIAGASSERT(pspriv != NULL); _DIAGASSERT(psenc != NULL); memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); } static __inline void /*ARGSUSED*/ _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei, _HZState * __restrict psenc, const void * __restrict pspriv) { /* ei may be unused */ _DIAGASSERT(psenc != NULL); _DIAGASSERT(pspriv != NULL); memcpy((void *)psenc, pspriv, sizeof(*psenc)); } static int _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, wchar_t * __restrict pwc, const char ** __restrict s, size_t n, _HZState * __restrict psenc, size_t * __restrict nresult) { const char *s0; wchar_t wc; int bit, head, tail, len, ch; graphic_t *graphic; escape_t *candidate, *init; const range_t *range; _DIAGASSERT(ei != NULL); /* pwc may be null */ _DIAGASSERT(s != NULL); _DIAGASSERT(psenc != NULL); _DIAGASSERT(nresult != NULL); if (*s == NULL) { _citrus_HZ_init_state(ei, psenc); *nresult = 1; return 0; } s0 = *s; if (psenc->chlen < 0 || psenc->inuse == NULL) return EINVAL; wc = (wchar_t)0; bit = head = tail = 0; graphic = NULL; for (len = 0; len <= MB_LEN_MAX; /**/) { if (psenc->chlen == tail) { if (n-- < 1) { *s = s0; *nresult = (size_t)-2; return 0; } psenc->ch[psenc->chlen++] = *s0++; ++len; } ch = (unsigned char)psenc->ch[tail++]; if (tail == 1) { if ((ch & ~0x80) <= 0x1F) { if (psenc->inuse != INIT0(ei)) break; wc = (wchar_t)ch; goto done; } if (ch & 0x80) { graphic = GR(psenc->inuse); bit = 0x80; ch &= ~0x80; } else { graphic = GL(psenc->inuse); if (ch == ESCAPE_CHAR) continue; bit = 0x0; } if (graphic == NULL) break; } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { if (tail < psenc->chlen) return EINVAL; if (ch == ESCAPE_CHAR) { ++head; } else if (ch == '\n') { if (psenc->inuse != INIT0(ei)) break; tail = psenc->chlen = 0; continue; } else { candidate = NULL; init = INIT0(ei); _DIAGASSERT(init != NULL); if (psenc->inuse == init) { init = INIT1(ei); } else if (INIT(psenc->inuse) == init) { if (ESC(init) != ch) break; candidate = init; } if (candidate == NULL) { candidate = find_escape( SET(psenc->inuse), ch); if (candidate == NULL) { if (init == NULL || ESC(init) != ch) break; candidate = init; } } psenc->inuse = candidate; tail = psenc->chlen = 0; continue; } } else if (ch & 0x80) { if (graphic != GR(psenc->inuse)) break; ch &= ~0x80; } else { if (graphic != GL(psenc->inuse)) break; } _DIAGASSERT(graphic != NULL); range = &ranges[(size_t)graphic->charset]; if (range->start > ch || range->end < ch) break; wc <<= 8; wc |= ch; if (graphic->length == (tail - head)) { if (graphic->charset > GB2312) bit |= ESC(psenc->inuse) << 24; wc |= bit; goto done; } } *nresult = (size_t)-1; return EILSEQ; done: if (tail < psenc->chlen) return EINVAL; *s = s0; if (pwc != NULL) *pwc = wc; psenc->chlen = 0; *nresult = (wc == 0) ? 0 : len; return 0; } static int _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, char * __restrict s, size_t n, wchar_t wc, _HZState * __restrict psenc, size_t * __restrict nresult) { int bit, ch; escape_t *candidate, *init; graphic_t *graphic; size_t len; const range_t *range; _DIAGASSERT(ei != NULL); _DIAGASSERT(s != NULL); _DIAGASSERT(psenc != NULL); _DIAGASSERT(nresult != NULL); if (psenc->chlen != 0 || psenc->inuse == NULL) return EINVAL; if (wc & 0x80) { bit = 0x80; wc &= ~0x80; } else { bit = 0x0; } if ((uint32_t)wc <= 0x1F) { candidate = INIT0(ei); graphic = (bit == 0) ? candidate->left : candidate->right; if (graphic == NULL) goto ilseq; range = &ranges[(size_t)CTRL]; len = 1; } else if ((uint32_t)wc <= 0x7F) { graphic = ei->ascii; if (graphic == NULL) goto ilseq; candidate = graphic->escape; range = &ranges[(size_t)graphic->charset]; len = graphic->length; } else if ((uint32_t)wc <= 0x7F7F) { graphic = ei->gb2312; if (graphic == NULL) goto ilseq; candidate = graphic->escape; range = &ranges[(size_t)graphic->charset]; len = graphic->length; } else { ch = (wc >> 24) & 0xFF; candidate = find_escape(E0SET(ei), ch); if (candidate == NULL) { candidate = find_escape(E1SET(ei), ch); if (candidate == NULL) goto ilseq; } wc &= ~0xFF000000; graphic = (bit == 0) ? candidate->left : candidate->right; if (graphic == NULL) goto ilseq; range = &ranges[(size_t)graphic->charset]; len = graphic->length; } if (psenc->inuse != candidate) { init = INIT0(ei); if (SET(psenc->inuse) == SET(candidate)) { if (INIT(psenc->inuse) != init || psenc->inuse == init || candidate == init) init = NULL; } else if (candidate == (init = INIT(candidate))) { init = NULL; } if (init != NULL) { if (n < 2) return E2BIG; n -= 2; psenc->ch[psenc->chlen++] = ESCAPE_CHAR; psenc->ch[psenc->chlen++] = ESC(init); } if (n < 2) return E2BIG; n -= 2; psenc->ch[psenc->chlen++] = ESCAPE_CHAR; psenc->ch[psenc->chlen++] = ESC(candidate); psenc->inuse = candidate; } if (n < len) return E2BIG; while (len-- > 0) { ch = (wc >> (len * 8)) & 0xFF; if (range->start > ch || range->end < ch) goto ilseq; psenc->ch[psenc->chlen++] = ch | bit; } memcpy(s, psenc->ch, psenc->chlen); *nresult = psenc->chlen; psenc->chlen = 0; return 0; ilseq: *nresult = (size_t)-1; return EILSEQ; } static __inline int _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, char * __restrict s, size_t n, _HZState * __restrict psenc, size_t * __restrict nresult) { escape_t *candidate; _DIAGASSERT(ei != NULL); _DIAGASSERT(s != NULL); _DIAGASSERT(psenc != NULL); _DIAGASSERT(nresult != NULL); if (psenc->chlen != 0 || psenc->inuse == NULL) return EINVAL; candidate = INIT0(ei); if (psenc->inuse != candidate) { if (n < 2) return E2BIG; n -= 2; psenc->ch[psenc->chlen++] = ESCAPE_CHAR; psenc->ch[psenc->chlen++] = ESC(candidate); } if (n < 1) return E2BIG; if (psenc->chlen > 0) memcpy(s, psenc->ch, psenc->chlen); *nresult = psenc->chlen; _citrus_HZ_init_state(ei, psenc); return 0; } static __inline int _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, _HZState * __restrict psenc, int * __restrict rstate) { _DIAGASSERT(ei != NULL); _DIAGASSERT(psenc != NULL); _DIAGASSERT(rstate != NULL); if (psenc->chlen < 0 || psenc->inuse == NULL) return EINVAL; *rstate = (psenc->chlen == 0) ? ((psenc->inuse == INIT0(ei)) ? _STDENC_SDGEN_INITIAL : _STDENC_SDGEN_STABLE) : ((psenc->ch[0] == ESCAPE_CHAR) ? _STDENC_SDGEN_INCOMPLETE_SHIFT : _STDENC_SDGEN_INCOMPLETE_CHAR); return 0; } static __inline int /*ARGSUSED*/ _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei, _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) { int bit; _DIAGASSERT(csid != NULL); _DIAGASSERT(idx != NULL); if (wc & 0x80) { bit = 0x80; wc &= ~0x80; } else { bit = 0x0; } if ((uint32_t)wc <= 0x7F) { *csid = (_csid_t)bit; *idx = (_index_t)wc; } else if ((uint32_t)wc <= 0x7F7F) { *csid = (_csid_t)(bit | 0x8000); *idx = (_index_t)wc; } else { *csid = (_index_t)(wc & ~0x00FFFF7F); *idx = (_csid_t)(wc & 0x00FFFF7F); } return 0; } static __inline int /*ARGSUSED*/ _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei, wchar_t * __restrict wc, _csid_t csid, _index_t idx) { _DIAGASSERT(ei != NULL); _DIAGASSERT(wc != NULL); *wc = (wchar_t)idx; switch (csid) { case 0x80: case 0x8080: *wc |= (wchar_t)0x80; /*FALLTHROUGH*/ case 0x0: case 0x8000: break; default: *wc |= (wchar_t)csid; } return 0; } static void _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) { escape_t *escape; _DIAGASSERT(ei != NULL); while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { TAILQ_REMOVE(E0SET(ei), escape, entry); free(GL(escape)); free(GR(escape)); free(escape); } while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { TAILQ_REMOVE(E1SET(ei), escape, entry); free(GL(escape)); free(GR(escape)); free(escape); } } static int _citrus_HZ_parse_char(void *context, const char *name, const char *s) { void **p; escape_t *escape; _DIAGASSERT(context != NULL && *context != NULL); _DIAGASSERT(name != NULL); _DIAGASSERT(s != NULL); p = (void **)context; escape = (escape_t *)p[0]; if (escape->ch != '\0') return EINVAL; escape->ch = *s++; if (escape->ch == ESCAPE_CHAR || *s != '\0') return EINVAL; return 0; } static int _citrus_HZ_parse_graphic(void *context, const char *name, const char *s) { void **p; _HZEncodingInfo *ei; escape_t *escape; graphic_t *graphic; _DIAGASSERT(context != NULL && *context != NULL); _DIAGASSERT(name != NULL); _DIAGASSERT(s != NULL); p = (void **)context; escape = (escape_t *)p[0]; ei = (_HZEncodingInfo *)p[1]; graphic = malloc(sizeof(*graphic)); if (graphic == NULL) return ENOMEM; memset(graphic, 0, sizeof(*graphic)); if (strcmp("GL", name) == 0) { if (GL(escape) != NULL) goto release; GL(escape) = graphic; } else if (strcmp("GR", name) == 0) { if (GR(escape) != NULL) goto release; GR(escape) = graphic; } else { release: free(graphic); return EINVAL; } graphic->escape = escape; if (_bcs_strncasecmp("ASCII", s, 5) == 0) { if (s[5] != '\0') return EINVAL; graphic->charset = ASCII; graphic->length = 1; ei->ascii = graphic; return 0; } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { if (s[6] != '\0') return EINVAL; graphic->charset = GB2312; graphic->length = 2; ei->gb2312 = graphic; return 0; } else if (strncmp("94*", s, 3) == 0) { graphic->charset = CS94; } else if (strncmp("96*", s, 3) == 0) { graphic->charset = CS96; } else { return EINVAL; } s += 3; switch(*s) { case '1': case '2': case '3': graphic->length = (size_t)(*s - '0'); if (*++s == '\0') break; /*FALLTHROUGH*/ default: return EINVAL; } return 0; } static const _citrus_prop_hint_t escape_hints[] = { _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), _CITRUS_PROP_HINT_END }; static int _citrus_HZ_parse_escape(void *context, const char *name, const char *s) { _HZEncodingInfo *ei; escape_t *escape; void *p[2]; _DIAGASSERT(context != NULL); _DIAGASSERT(name != NULL); _DIAGASSERT(s != NULL); ei = (_HZEncodingInfo *)context; escape = malloc(sizeof(*escape)); if (escape == NULL) return EINVAL; memset(escape, 0, sizeof(*escape)); if (strcmp("0", name) == 0) { escape->set = E0SET(ei); TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); } else if (strcmp("1", name) == 0) { escape->set = E1SET(ei); TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); } else { free(escape); return EINVAL; } p[0] = (void *)escape; p[1] = (void *)ei; return _citrus_prop_parse_variable( escape_hints, (void *)&p[0], s, strlen(s)); } static const _citrus_prop_hint_t root_hints[] = { _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), _CITRUS_PROP_HINT_END }; static int _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, const void * __restrict var, size_t lenvar) { int errnum; _DIAGASSERT(ei != NULL); memset(ei, 0, sizeof(*ei)); TAILQ_INIT(E0SET(ei)); TAILQ_INIT(E1SET(ei)); errnum = _citrus_prop_parse_variable( root_hints, (void *)ei, var, lenvar); if (errnum != 0) _citrus_HZ_encoding_module_uninit(ei); return errnum; } /* ---------------------------------------------------------------------- * public interface for ctype */ _CITRUS_CTYPE_DECLS(HZ); _CITRUS_CTYPE_DEF_OPS(HZ); #include "citrus_ctype_template.h" /* ---------------------------------------------------------------------- * public interface for stdenc */ _CITRUS_STDENC_DECLS(HZ); _CITRUS_STDENC_DEF_OPS(HZ); #include "citrus_stdenc_template.h"