/* This file is part of the YAZ toolkit. * Copyright (C) Index Data * See the file LICENSE for details. */ /** * \file * \brief ISO 5426 decoding * * MARC-8 reference: * http://www.loc.gov/marc/specifications/specchariso8.html * * ISO 5426 reference (in German) * Zeichenkonkordanz MAB2-Zeichensatz - ISO/IEC 10646 / Unicode * http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf */ #if HAVE_CONFIG_H #include #endif #include #include #include #include #include "iconv-p.h" struct decoder_data { int g0_mode; int g1_mode; int comb_offset; int comb_size; unsigned long comb_x[8]; size_t comb_no_read[8]; }; yaz_conv_func_t yaz_iso5426_42_conv; yaz_conv_func_t yaz_iso5426_45_conv; yaz_conv_func_t yaz_iso5426_67_conv; yaz_conv_func_t yaz_iso5426_62_conv; yaz_conv_func_t yaz_iso5426_70_conv; yaz_conv_func_t yaz_iso5426_32_conv; yaz_conv_func_t yaz_iso5426_4E_conv; yaz_conv_func_t yaz_iso5426_51_conv; yaz_conv_func_t yaz_iso5426_33_conv; yaz_conv_func_t yaz_iso5426_34_conv; yaz_conv_func_t yaz_iso5426_53_conv; yaz_conv_func_t yaz_iso5426_31_conv; static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, int *comb); static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { struct decoder_data *data = (struct decoder_data *) d->data; unsigned long x; if (data->comb_offset < data->comb_size) { *no_read = data->comb_no_read[data->comb_offset]; x = data->comb_x[data->comb_offset]; /* special case for double-diacritic combining characters, INVERTED BREVE and DOUBLE TILDE. We'll increment the no_read counter by 1, since we want to skip over the processing of the closing ligature character */ /* this code is no longer necessary.. our handlers code in yaz_iso5426_?_conv (generated by charconv.tcl) now returns 0 and no_read=1 when a sequence does not match the input. The SECOND HALFs in codetables.xml produces a non-existant entry in the conversion trie.. Hence when met, the input byte is skipped as it should (in yaz_iconv) */ #if 0 if (x == 0x0361 || x == 0x0360) *no_read += 1; #endif data->comb_offset++; return x; } data->comb_offset = 0; for (data->comb_size = 0; data->comb_size < 8; data->comb_size++) { int comb = 0; if (inbytesleft == 0 && data->comb_size) { yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); x = 0; *no_read = 0; break; } x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb); if (!comb || !x) break; data->comb_x[data->comb_size] = x; data->comb_no_read[data->comb_size] = *no_read; inp += *no_read; inbytesleft = inbytesleft - *no_read; } return x; } static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, int *comb) { *no_read = 0; while (inbytesleft > 0 && *inp == 27) { int *modep = &data->g0_mode; size_t inbytesleft0 = inbytesleft; inbytesleft--; inp++; if (inbytesleft == 0) goto incomplete; if (*inp == '$') /* set with multiple bytes */ { inbytesleft--; inp++; } if (inbytesleft == 0) goto incomplete; if (*inp == '(' || *inp == ',') /* G0 */ { inbytesleft--; inp++; } else if (*inp == ')' || *inp == '-') /* G1 */ { inbytesleft--; inp++; modep = &data->g1_mode; } if (inbytesleft == 0) goto incomplete; if (*inp == '!') /* ANSEL is a special case */ { inbytesleft--; inp++; } if (inbytesleft == 0) goto incomplete; *modep = *inp++; /* Final character */ inbytesleft--; (*no_read) += inbytesleft0 - inbytesleft; } if (inbytesleft == 0) return 0; else if (*inp == ' ') { *no_read += 1; return ' '; } else { unsigned long x; size_t no_read_sub = 0; int mode = *inp < 128 ? data->g0_mode : data->g1_mode; *comb = 0; switch(mode) { case 'B': /* Basic ASCII */ case 's': /* ASCII */ x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'E': /* ANSEL */ x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128); break; default: *no_read = 0; yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ); return 0; } *no_read += no_read_sub; return x; } incomplete: *no_read = 0; yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); return 0; } static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { struct decoder_data *data = (struct decoder_data *) d->data; data->g0_mode = 'B'; data->g1_mode = 'E'; data->comb_offset = data->comb_size = 0; return 0; } void destroy_iso5426(yaz_iconv_decoder_t d) { struct decoder_data *data = (struct decoder_data *) d->data; xfree(data); } yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode, yaz_iconv_decoder_t d) { if (!yaz_matchstr(fromcode, "ISO5426")) d->read_handle = read_iso5426; else return 0; { struct decoder_data *data = (struct decoder_data *) xmalloc(sizeof(*data)); d->data = data; d->init_handle = init_iso5426; d->destroy_handle = destroy_iso5426; } return d; } /* * Local variables: * c-basic-offset: 4 * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */