/* * BRLTTY - A background process providing access to the console screen (when in * text mode) for a blind person using a refreshable braille display. * * Copyright (C) 1995-2021 by The BRLTTY Developers. * * BRLTTY comes with ABSOLUTELY NO WARRANTY. * * This is free software, placed under the terms of the * GNU Lesser General Public License, as published by the Free Software * Foundation; either version 2.1 of the License, or (at your option) any * later version. Please see the file LICENSE-LGPL for details. * * Web Page: http://brltty.app/ * * This software is maintained by Dave Mielke . */ #include "prologue.h" #include #ifdef HAVE_ICU #endif /* HAVE_ICU */ #include "log.h" #include "ctb_translate.h" #include "ttb.h" #include "brl_dots.h" #include "unicode.h" #include "utf8.h" #ifdef HAVE_ICU #include typedef struct { unsigned int index; ULineBreak after; ULineBreak before; ULineBreak previous; ULineBreak indirect; } LineBreakOpportunitiesState; static void prepareLineBreakOpportunitiesState (LineBreakOpportunitiesState *lbo) { lbo->index = 0; lbo->after = U_LB_SPACE; lbo->before = lbo->after; lbo->previous = lbo->before; lbo->indirect = U_LB_SPACE; } static void findLineBreakOpportunities ( BrailleContractionData *bcd, LineBreakOpportunitiesState *lbo, unsigned char *opportunities, const wchar_t *characters, unsigned int limit ) { /* UAX #14: Line Breaking Properties * http://unicode.org/reports/tr14/ * Section 6: Line Breaking Algorithm * * ! Mandatory break at the indicated position * ^ No break allowed at the indicated position * _ Break allowed at the indicated position * * H ideographs * h small kana * 9 digits */ while (lbo->index <= limit) { unsigned char *opportunity = &opportunities[lbo->index]; lbo->previous = lbo->before; lbo->before = lbo->after; lbo->after = u_getIntPropertyValue(characters[lbo->index], UCHAR_LINE_BREAK); lbo->index += 1; /* LB9 Do not break a combining character sequence. */ if (lbo->after == U_LB_COMBINING_MARK) { /* LB10: Treat any remaining combining mark as AL. */ if ((lbo->before == U_LB_MANDATORY_BREAK) || (lbo->before == U_LB_CARRIAGE_RETURN) || (lbo->before == U_LB_LINE_FEED) || (lbo->before == U_LB_NEXT_LINE) || (lbo->before == U_LB_SPACE) || (lbo->before == U_LB_ZWSPACE)) { lbo->before = U_LB_ALPHABETIC; } /* treat it as if it has the line breaking class of the base character */ lbo->after = lbo->before; *opportunity = 0; continue; } if (lbo->before != U_LB_SPACE) lbo->indirect = lbo->before; /* LB2: Never break at the start of text. * sot × */ if (opportunity == opportunities) { *opportunity = 0; continue; } /* LB4: Always break after hard line breaks * BK ! */ if (lbo->before == U_LB_MANDATORY_BREAK) { *opportunity = 1; continue; } /* LB5: Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks. * CR ^ LF * CR ! * LF ! * NL ! */ if ((lbo->before == U_LB_CARRIAGE_RETURN) && (lbo->after == U_LB_LINE_FEED)) { *opportunity = 0; continue; } if ((lbo->before == U_LB_CARRIAGE_RETURN) || (lbo->before == U_LB_LINE_FEED) || (lbo->before == U_LB_NEXT_LINE)) { *opportunity = 1; continue; } /* LB6: Do not break before hard line breaks. * ^ ( BK | CR | LF | NL ) */ if ((lbo->after == U_LB_MANDATORY_BREAK) || (lbo->after == U_LB_CARRIAGE_RETURN) || (lbo->after == U_LB_LINE_FEED) || (lbo->after == U_LB_NEXT_LINE)) { *opportunity = 0; continue; } /* LB7: Do not break before spaces or zero width space. * ^ SP * ^ ZW */ if ((lbo->after == U_LB_SPACE) || (lbo->after == U_LB_ZWSPACE)) { *opportunity = 0; continue; } /* LB8: Break after zero width space. * ZW _ */ if (lbo->before == U_LB_ZWSPACE) { *opportunity = 1; continue; } /* LB11: Do not break before or after Word joiner and related characters. * ^ WJ * WJ ^ */ if ((lbo->before == U_LB_WORD_JOINER) || (lbo->after == U_LB_WORD_JOINER)) { *opportunity = 0; continue; } /* LB12: Do not break before or after NBSP and related characters. * [^SP] ^ GL * GL ^ */ if ((lbo->before != U_LB_SPACE) && (lbo->after == U_LB_GLUE)) { *opportunity = 0; continue; } if (lbo->before == U_LB_GLUE) { *opportunity = 0; continue; } /* LB13: Do not break before ‘]' or ‘!' or ‘;' or ‘/', even after spaces. * ^ CL * ^ EX * ^ IS * ^ SY */ if ((lbo->after == U_LB_CLOSE_PUNCTUATION) || (lbo->after == U_LB_EXCLAMATION) || (lbo->after == U_LB_INFIX_NUMERIC) || (lbo->after == U_LB_BREAK_SYMBOLS)) { *opportunity = 0; continue; } /* LB14: Do not break after ‘[', even after spaces. * OP SP* ^ */ if (lbo->indirect == U_LB_OPEN_PUNCTUATION) { *opportunity = 0; continue; } /* LB15: Do not break within ‘"[', even with intervening spaces. * QU SP* ^ OP */ if ((lbo->indirect == U_LB_QUOTATION) && (lbo->after == U_LB_OPEN_PUNCTUATION)) { *opportunity = 0; continue; } /* LB16: Do not break within ‘]h', even with intervening spaces. * CL SP* ^ NS */ if ((lbo->indirect == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_NONSTARTER)) { *opportunity = 0; continue; } /* LB17: Do not break within ‘ــ', even with intervening spaces. * B2 SP* ^ B2 */ if ((lbo->indirect == U_LB_BREAK_BOTH) && (lbo->after == U_LB_BREAK_BOTH)) { *opportunity = 0; continue; } /* LB18: Break after spaces. * SP _ */ if (lbo->before == U_LB_SPACE) { *opportunity = 1; continue; } /* LB19: Do not break before or after quotation marks. * ^ QU * QU ^ */ if ((lbo->before == U_LB_QUOTATION) || (lbo->after == U_LB_QUOTATION)) { *opportunity = 0; continue; } /* LB20: Break before and after unresolved. * _ CB * CB _ */ if ((lbo->after == U_LB_CONTINGENT_BREAK) || (lbo->before == U_LB_CONTINGENT_BREAK)) { *opportunity = 1; continue; } /* LB21: Do not break before hyphen-minus, other hyphens, * fixed-width spaces, small kana, and other non-starters, * or lbo->after acute accents. * ^ BA * ^ HY * ^ NS * BB ^ */ if ((lbo->after == U_LB_BREAK_AFTER) || (lbo->after == U_LB_HYPHEN) || (lbo->after == U_LB_NONSTARTER) || (lbo->before == U_LB_BREAK_BEFORE)) { *opportunity = 0; continue; } /* LB22: Do not break between two ellipses, * or between letters or numbers and ellipsis. * AL ^ IN * ID ^ IN * IN ^ IN * NU ^ IN */ if ((lbo->after == U_LB_INSEPARABLE) && ((lbo->before == U_LB_ALPHABETIC) || (lbo->before == U_LB_IDEOGRAPHIC) || (lbo->before == U_LB_INSEPARABLE) || (lbo->before == U_LB_NUMERIC))) { *opportunity = 0; continue; } /* LB23: Do not break within ‘a9', ‘3a', or ‘H%'. * ID ^ PO * AL ^ NU * NU ^ AL */ if (((lbo->before == U_LB_IDEOGRAPHIC) && (lbo->after == U_LB_POSTFIX_NUMERIC)) || ((lbo->before == U_LB_ALPHABETIC) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_ALPHABETIC))) { *opportunity = 0; continue; } /* LB24: Do not break between prefix and letters or ideographs. * PR ^ ID * PR ^ AL * PO ^ AL */ if (((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_IDEOGRAPHIC)) || ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC)) || ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC))) { *opportunity = 0; continue; } /* LB25: Do not break between the following pairs of classes relevant to numbers: * CL ^ PO * CL ^ PR * NU ^ PO * NU ^ PR * PO ^ OP * PO ^ NU * PR ^ OP * PR ^ NU * HY ^ NU * IS ^ NU * NU ^ NU * SY ^ NU */ if (((lbo->before == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_POSTFIX_NUMERIC)) || ((lbo->before == U_LB_CLOSE_PUNCTUATION) && (lbo->after == U_LB_PREFIX_NUMERIC)) || ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_POSTFIX_NUMERIC)) || ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_PREFIX_NUMERIC)) || ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_OPEN_PUNCTUATION)) || ((lbo->before == U_LB_POSTFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_OPEN_PUNCTUATION)) || ((lbo->before == U_LB_PREFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_HYPHEN) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_INFIX_NUMERIC) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_NUMERIC) && (lbo->after == U_LB_NUMERIC)) || ((lbo->before == U_LB_BREAK_SYMBOLS) && (lbo->after == U_LB_NUMERIC))) { *opportunity = 0; continue; } /* LB26: Do not break a Korean syllable. * JL ^ (JL | JV | H2 | H3) * (JV | H2) ^ (JV | JT) * (JT | H3) ^ JT */ if ((lbo->before == U_LB_JL) && ((lbo->after == U_LB_JL) || (lbo->after == U_LB_JV) || (lbo->after == U_LB_H2) || (lbo->after == U_LB_H3))) { *opportunity = 0; continue; } if (((lbo->before == U_LB_JV) || (lbo->before == U_LB_H2)) && ((lbo->after == U_LB_JV) || (lbo->after == U_LB_JT))) { *opportunity = 0; continue; } if (((lbo->before == U_LB_JT) || (lbo->before == U_LB_H3)) && (lbo->after == U_LB_JT)) { *opportunity = 0; continue; } /* LB27: Treat a Korean Syllable Block the same as ID. * (JL | JV | JT | H2 | H3) ^ IN * (JL | JV | JT | H2 | H3) ^ PO * PR ^ (JL | JV | JT | H2 | H3) */ if (((lbo->before == U_LB_JL) || (lbo->before == U_LB_JV) || (lbo->before == U_LB_JT) || (lbo->before == U_LB_H2) || (lbo->before == U_LB_H3)) && (lbo->after == U_LB_INSEPARABLE)) { *opportunity = 0; continue; } if (((lbo->before == U_LB_JL) || (lbo->before == U_LB_JV) || (lbo->before == U_LB_JT) || (lbo->before == U_LB_H2) || (lbo->before == U_LB_H3)) && (lbo->after == U_LB_POSTFIX_NUMERIC)) { *opportunity = 0; continue; } if ((lbo->before == U_LB_PREFIX_NUMERIC) && ((lbo->after == U_LB_JL) || (lbo->after == U_LB_JV) || (lbo->after == U_LB_JT) || (lbo->after == U_LB_H2) || (lbo->after == U_LB_H3))) { *opportunity = 0; continue; } /* LB28: Do not break between alphabetics. * AL ^ AL */ if ((lbo->before == U_LB_ALPHABETIC) && (lbo->after == U_LB_ALPHABETIC)) { *opportunity = 0; continue; } /* LB29: Do not break between numeric punctuation and alphabetics. * IS ^ AL */ if ((lbo->before == U_LB_INFIX_NUMERIC) && (lbo->after == U_LB_ALPHABETIC)) { *opportunity = 0; continue; } /* LB30: Do not break between letters, numbers, or ordinary symbols * and opening or closing punctuation. * (AL | NU) ^ OP * CL ^ (AL | NU) */ if (((lbo->before == U_LB_ALPHABETIC) || (lbo->before == U_LB_NUMERIC)) && (lbo->after == U_LB_OPEN_PUNCTUATION)) { *opportunity = 0; continue; } if ((lbo->before == U_LB_CLOSE_PUNCTUATION) && ((lbo->after == U_LB_ALPHABETIC) || (lbo->after == U_LB_NUMERIC))) { *opportunity = 0; continue; } /* Unix options begin with a minus sign. */ if ((lbo->before == U_LB_HYPHEN) && (lbo->after != U_LB_SPACE) && (lbo->previous == U_LB_SPACE)) { *opportunity = 0; continue; } /* LB31: Break everywhere else. * ALL _ * _ ALL */ *opportunity = 1; } } #else /* HAVE_ICU */ #include "ascii.h" typedef struct { unsigned int index; int wasSpace; } LineBreakOpportunitiesState; static void prepareLineBreakOpportunitiesState (LineBreakOpportunitiesState *lbo) { lbo->index = 0; lbo->wasSpace = 0; } static void findLineBreakOpportunities ( BrailleContractionData *bcd, LineBreakOpportunitiesState *lbo, unsigned char *opportunities, const wchar_t *characters, unsigned int limit ) { while (lbo->index <= limit) { int isSpace = testCharacter(bcd, characters[lbo->index], CTC_Space); opportunities[lbo->index] = lbo->wasSpace && !isSpace; lbo->wasSpace = isSpace; lbo->index += 1; } } #endif /* HAVE_ICU */ static inline ContractionTableHeader * getContractionTableHeader (BrailleContractionData *bcd) { return bcd->table->data.internal.header.fields; } static inline const void * getContractionTableItem (BrailleContractionData *bcd, ContractionTableOffset offset) { return &bcd->table->data.internal.header.bytes[offset]; } static const ContractionTableCharacter * getContractionTableCharacter (BrailleContractionData *bcd, wchar_t character) { const ContractionTableCharacter *characters = getContractionTableItem(bcd, getContractionTableHeader(bcd)->characters); int first = 0; int last = getContractionTableHeader(bcd)->characterCount - 1; while (first <= last) { int current = (first + last) / 2; const ContractionTableCharacter *ctc = &characters[current]; if (ctc->value < character) { first = current + 1; } else if (ctc->value > character) { last = current - 1; } else { return ctc; } } return NULL; } typedef struct { BrailleContractionData *bcd; CharacterEntry *character; } SetAlwaysRuleData; static int setAlwaysRule (wchar_t character, void *data) { SetAlwaysRuleData *sar = data; const ContractionTableCharacter *ctc = getContractionTableCharacter(sar->bcd, character); if (ctc) { ContractionTableOffset offset = ctc->always; if (offset) { const ContractionTableRule *rule = getContractionTableItem(sar->bcd, offset); if (rule->replen) { sar->character->always = rule; return 1; } } } return 0; } static const ContractionTableRule * getAlwaysRule (BrailleContractionData *bcd, wchar_t character) { const CharacterEntry *entry = getCharacterEntry(bcd, character); return entry? entry->always: NULL; } static wchar_t getBestCharacter (BrailleContractionData *bcd, wchar_t character) { const ContractionTableRule *rule = getAlwaysRule(bcd, character); return rule? rule->findrep[0]: 0; } static int sameCharacters (BrailleContractionData *bcd, wchar_t character1, wchar_t character2) { wchar_t best1 = getBestCharacter(bcd, character1); return best1 && (best1 == getBestCharacter(bcd, character2)); } static wchar_t toLowerCase (BrailleContractionData *bcd, wchar_t character) { const CharacterEntry *entry = getCharacterEntry(bcd, character); return entry? entry->lowercase: character; } static int checkCurrentRule (BrailleContractionData *bcd, const wchar_t *source) { const wchar_t *character = bcd->current.rule->findrep; int count = bcd->current.length; while (count) { if (toLowerCase(bcd, *source) != toLowerCase(bcd, *character)) return 0; --count, ++source, ++character; } return 1; } static void setBefore (BrailleContractionData *bcd) { bcd->current.before = (bcd->input.current == bcd->input.begin)? WC_C(' '): bcd->input.current[-1]; } static void setAfter (BrailleContractionData *bcd, int length) { bcd->current.after = (bcd->input.current + length < bcd->input.end)? bcd->input.current[length]: WC_C(' '); } static int isBeginning (BrailleContractionData *bcd) { const wchar_t *ptr = bcd->input.current; while (ptr > bcd->input.begin) { if (!testCharacter(bcd, *--ptr, CTC_Punctuation)) { if (!testCharacter(bcd, *ptr, CTC_Space)) return 0; break; } } return 1; } static int isEnding (BrailleContractionData *bcd) { const wchar_t *ptr = bcd->input.current + bcd->current.length; while (ptr < bcd->input.end) { if (!testCharacter(bcd, *ptr, CTC_Punctuation)) { if (!testCharacter(bcd, *ptr, CTC_Space)) return 0; break; } ptr += 1; } return 1; } static int selectRule (BrailleContractionData *bcd, int length) { int ruleOffset; int maximumLength; if (length < 1) return 0; if (length == 1) { const ContractionTableCharacter *ctc = getContractionTableCharacter(bcd, toLowerCase(bcd, *bcd->input.current)); if (!ctc) return 0; ruleOffset = ctc->rules; maximumLength = 1; } else { wchar_t characters[2]; characters[0] = toLowerCase(bcd, bcd->input.current[0]); characters[1] = toLowerCase(bcd, bcd->input.current[1]); ruleOffset = getContractionTableHeader(bcd)->rules[CTH(characters)]; maximumLength = 0; } while (ruleOffset) { bcd->current.rule = getContractionTableItem(bcd, ruleOffset); bcd->current.opcode = bcd->current.rule->opcode; bcd->current.length = bcd->current.rule->findlen; if ((length == 1) || ((bcd->current.length <= length) && checkCurrentRule(bcd, bcd->input.current))) { setAfter(bcd, bcd->current.length); if (!maximumLength) { maximumLength = bcd->current.length; if (prefs.capitalizationMode != CTB_CAP_NONE) { typedef enum {CS_Any, CS_Lower, CS_UpperSingle, CS_UpperMultiple} CapitalizationState; #define STATE(c) (testCharacter(bcd, (c), CTC_UpperCase)? CS_UpperSingle: testCharacter(bcd, (c), CTC_LowerCase)? CS_Lower: CS_Any) CapitalizationState current = STATE(bcd->current.before); int i; for (i=0; icurrent.length; i+=1) { wchar_t character = bcd->input.current[i]; CapitalizationState next = STATE(character); if (i > 0) { if (((current == CS_Lower) && (next == CS_UpperSingle)) || ((current == CS_UpperMultiple) && (next == CS_Lower))) { maximumLength = i; break; } if ((prefs.capitalizationMode != CTB_CAP_SIGN) && (next == CS_UpperSingle)) { maximumLength = i; break; } } if ((prefs.capitalizationMode == CTB_CAP_SIGN) && (current > CS_Lower) && (next == CS_UpperSingle)) { current = CS_UpperMultiple; } else if (next != CS_Any) { current = next; } else if (current == CS_Any) { current = CS_Lower; } } #undef STATE } } if ((bcd->current.length <= maximumLength) && (!bcd->current.rule->after || testBefore(bcd, bcd->current.rule->after)) && (!bcd->current.rule->before || testAfter(bcd, bcd->current.rule->before))) { switch (bcd->current.opcode) { case CTO_Always: case CTO_Repeatable: case CTO_Literal: case CTO_Replace: return 1; case CTO_LargeSign: case CTO_LastLargeSign: if (!isBeginning(bcd) || !isEnding(bcd)) bcd->current.opcode = CTO_Always; return 1; case CTO_WholeWord: if (testBefore(bcd, CTC_Space|CTC_Punctuation) && testAfter(bcd, CTC_Space|CTC_Punctuation)) return 1; break; case CTO_Contraction: if ((bcd->input.current > bcd->input.begin) && sameCharacters(bcd, bcd->input.current[-1], WC_C('\''))) break; if (isBeginning(bcd) && isEnding(bcd)) return 1; break; case CTO_LowWord: if (testBefore(bcd, CTC_Space) && testAfter(bcd, CTC_Space) && (bcd->previous.opcode != CTO_JoinedWord) && ((bcd->output.current == bcd->output.begin) || !bcd->output.current[-1])) return 1; break; case CTO_JoinedWord: if (testBefore(bcd, CTC_Space|CTC_Punctuation) && !sameCharacters(bcd, bcd->current.before, WC_C('-')) && (bcd->output.current + bcd->current.rule->replen < bcd->output.end)) { const wchar_t *end = bcd->input.current + bcd->current.length; const wchar_t *ptr = end; while (ptr < bcd->input.end) { if (!testCharacter(bcd, *ptr, CTC_Space)) { if (!testCharacter(bcd, *ptr, CTC_Letter)) break; if (ptr == end) break; return 1; } if (ptr++ == bcd->input.cursor) break; } } break; case CTO_SuffixableWord: if (testBefore(bcd, CTC_Space|CTC_Punctuation) && testAfter(bcd, CTC_Space|CTC_Letter|CTC_Punctuation)) return 1; break; case CTO_PrefixableWord: if (testBefore(bcd, CTC_Space|CTC_Letter|CTC_Punctuation) && testAfter(bcd, CTC_Space|CTC_Punctuation)) return 1; break; case CTO_BegWord: if (testBefore(bcd, CTC_Space|CTC_Punctuation) && testAfter(bcd, CTC_Letter)) return 1; break; case CTO_BegMidWord: if (testBefore(bcd, CTC_Letter|CTC_Space|CTC_Punctuation) && testAfter(bcd, CTC_Letter)) return 1; break; case CTO_MidWord: if (testBefore(bcd, CTC_Letter) && testAfter(bcd, CTC_Letter)) return 1; break; case CTO_MidEndWord: if (testBefore(bcd, CTC_Letter) && testAfter(bcd, CTC_Letter|CTC_Space|CTC_Punctuation)) return 1; break; case CTO_EndWord: if (testBefore(bcd, CTC_Letter) && testAfter(bcd, CTC_Space|CTC_Punctuation)) return 1; break; case CTO_BegNum: if (testBefore(bcd, CTC_Space|CTC_Punctuation) && testAfter(bcd, CTC_Digit)) return 1; break; case CTO_MidNum: if (testBefore(bcd, CTC_Digit) && testAfter(bcd, CTC_Digit)) return 1; break; case CTO_EndNum: if (testBefore(bcd, CTC_Digit) && testAfter(bcd, CTC_Space|CTC_Punctuation)) return 1; break; case CTO_PrePunc: if (testCurrent(bcd, CTC_Punctuation) && isBeginning(bcd) && !isEnding(bcd)) return 1; break; case CTO_PostPunc: if (testCurrent(bcd, CTC_Punctuation) && !isBeginning(bcd) && isEnding(bcd)) return 1; break; default: break; } } } ruleOffset = bcd->current.rule->next; } return 0; } static int putCells (BrailleContractionData *bcd, const BYTE *cells, int count) { if (bcd->output.current + count > bcd->output.end) return 0; bcd->output.current = mempcpy(bcd->output.current, cells, count); return 1; } static int putCell (BrailleContractionData *bcd, BYTE byte) { return putCells(bcd, &byte, 1); } static int putReplace (BrailleContractionData *bcd, const ContractionTableRule *rule, wchar_t character) { const BYTE *cells = (BYTE *)&rule->findrep[rule->findlen]; int count = rule->replen; if ((prefs.capitalizationMode == CTB_CAP_DOT7) && testCharacter(bcd, character, CTC_UpperCase)) { if (!putCell(bcd, *cells++ | BRL_DOT_7)) return 0; if (!(count -= 1)) return 1; } return putCells(bcd, cells, count); } static int putCharacter (BrailleContractionData *bcd, wchar_t character) { { const ContractionTableRule *rule = getAlwaysRule(bcd, character); if (rule) return putReplace(bcd, rule, character); } if (isBrailleCharacter(character)) { return putCell(bcd, (character & UNICODE_CELL_MASK)); } if (textTable) { unsigned char dots = convertCharacterToDots(textTable, character); return putCell(bcd, dots); } { const wchar_t replacementCharacter = getReplacementCharacter(); if (replacementCharacter != character) { const ContractionTableRule *rule = getAlwaysRule(bcd, replacementCharacter); if (rule) return putReplace(bcd, rule, replacementCharacter); } } return putCell(bcd, (BRL_DOT_1 | BRL_DOT_2 | BRL_DOT_3 | BRL_DOT_4 | BRL_DOT_5 | BRL_DOT_6 | BRL_DOT_7 | BRL_DOT_8)); } static int putSequence (BrailleContractionData *bcd, ContractionTableOffset offset) { const BYTE *sequence = getContractionTableItem(bcd, offset); return putCells(bcd, sequence+1, *sequence); } static void clearRemainingOffsets (BrailleContractionData *bcd) { const wchar_t *next = bcd->input.current + bcd->current.length; while (++bcd->input.current < next) clearOffset(bcd); } static int contractText_native (BrailleContractionData *bcd) { const wchar_t *srcword = NULL; BYTE *destword = NULL; const wchar_t *srcjoin = NULL; BYTE *destjoin = NULL; BYTE *destlast = NULL; const wchar_t *literal = NULL; unsigned char lineBreakOpportunities[getInputCount(bcd)]; LineBreakOpportunitiesState lbo; prepareLineBreakOpportunitiesState(&lbo); bcd->previous.opcode = CTO_None; while (bcd->input.current < bcd->input.end) { int wasLiteral = bcd->input.current == literal; destlast = bcd->output.current; setOffset(bcd); setBefore(bcd); if (literal) if (bcd->input.current >= literal) if (testCurrent(bcd, CTC_Space) || testPrevious(bcd, CTC_Space)) literal = NULL; if ((!literal && selectRule(bcd, getInputUnconsumed(bcd))) || selectRule(bcd, 1)) { if (!literal && ((bcd->current.opcode == CTO_Literal) || (prefs.expandCurrentWord && (bcd->input.cursor >= bcd->input.current) && (bcd->input.cursor < (bcd->input.current + bcd->current.length))))) { literal = bcd->input.current + bcd->current.length; if (!testCurrent(bcd, CTC_Space)) { if (destjoin) { bcd->input.current = srcjoin; bcd->output.current = destjoin; } else { bcd->input.current = bcd->input.begin; bcd->output.current = bcd->output.begin; } } continue; } if (bcd->current.opcode == CTO_Replace) { const ContractionTableRule *rule = bcd->current.rule; size_t size = rule->replen + 1; wchar_t characters[size]; wchar_t *to = characters; const char *from = (const char *)&rule->findrep[rule->findlen]; convertUtf8ToWchars(&from, &to, size); const wchar_t *inputBuffer = characters; int inputLength = to - characters; int outputLength = bcd->output.end - bcd->output.current; contractText( bcd->table, inputBuffer, &inputLength, bcd->output.current, &outputLength, NULL, CTB_NO_CURSOR ); bcd->output.current += outputLength; clearRemainingOffsets(bcd); continue; } if (getContractionTableHeader(bcd)->numberSign && (bcd->previous.opcode != CTO_MidNum) && !testBefore(bcd, CTC_Digit) && testCurrent(bcd, CTC_Digit)) { if (!putSequence(bcd, getContractionTableHeader(bcd)->numberSign)) break; } else if (getContractionTableHeader(bcd)->englishLetterSign && testCurrent(bcd, CTC_Letter)) { if ((bcd->current.opcode == CTO_Contraction) || ((bcd->current.opcode != CTO_EndNum) && testBefore(bcd, CTC_Digit)) || (testCurrent(bcd, CTC_Letter) && (bcd->current.opcode == CTO_Always) && (bcd->current.length == 1) && testBefore(bcd, CTC_Space) && (((bcd->input.current + 1) == bcd->input.end) || testNext(bcd, CTC_Space) || (testNext(bcd, CTC_Punctuation) && !sameCharacters(bcd, bcd->input.current[1], WC_C('.')) && !sameCharacters(bcd, bcd->input.current[1], WC_C('\'')))))) { if (!putSequence(bcd, getContractionTableHeader(bcd)->englishLetterSign)) break; } } if (prefs.capitalizationMode == CTB_CAP_SIGN) { if (testCurrent(bcd, CTC_UpperCase)) { if (!testBefore(bcd, CTC_UpperCase)) { if (getContractionTableHeader(bcd)->beginCapitalSign && (bcd->input.current + 1 < bcd->input.end) && testNext(bcd, CTC_UpperCase)) { if (!putSequence(bcd, getContractionTableHeader(bcd)->beginCapitalSign)) break; } else if (getContractionTableHeader(bcd)->capitalSign) { if (!putSequence(bcd, getContractionTableHeader(bcd)->capitalSign)) break; } } } else if (testCurrent(bcd, CTC_LowerCase)) { if (getContractionTableHeader(bcd)->endCapitalSign && (bcd->input.current - 2 >= bcd->input.begin) && testPrevious(bcd, CTC_UpperCase) && testRelative(bcd, -2, CTC_UpperCase)) { if (!putSequence(bcd, getContractionTableHeader(bcd)->endCapitalSign)) break; } } } switch (bcd->current.opcode) { case CTO_LargeSign: case CTO_LastLargeSign: if ((bcd->previous.opcode == CTO_LargeSign) && !wasLiteral) { while ((bcd->output.current > bcd->output.begin) && !bcd->output.current[-1]) bcd->output.current -= 1; setOffset(bcd); { BYTE **destptrs[] = {&destword, &destjoin, &destlast, NULL}; BYTE ***destptr = destptrs; while (*destptr) { if (**destptr && (**destptr > bcd->output.current)) **destptr = bcd->output.current; destptr += 1; } } } break; default: break; } if (bcd->current.rule->replen && !((bcd->current.opcode == CTO_Always) && (bcd->current.length == 1))) { if (!putReplace(bcd, bcd->current.rule, *bcd->input.current)) goto done; clearRemainingOffsets(bcd); } else { const wchar_t *srclim = bcd->input.current + bcd->current.length; while (1) { if (!putCharacter(bcd, *bcd->input.current)) goto done; if (++bcd->input.current == srclim) break; setOffset(bcd); } } { const wchar_t *srcorig = bcd->input.current; const wchar_t *srcbeg = NULL; BYTE *destbeg = NULL; switch (bcd->current.opcode) { case CTO_Repeatable: { const wchar_t *srclim = bcd->input.end - bcd->current.length; srcbeg = bcd->input.current - bcd->current.length; destbeg = destlast; while ((bcd->input.current <= srclim) && checkCurrentRule(bcd, bcd->input.current)) { clearOffset(bcd); clearRemainingOffsets(bcd); } break; } case CTO_JoinedWord: srcbeg = bcd->input.current; destbeg = bcd->output.current; while ((bcd->input.current < bcd->input.end) && testCurrent(bcd, CTC_Space)) { clearOffset(bcd); bcd->input.current += 1; } break; default: break; } if (srcbeg && (bcd->input.cursor >= srcbeg) && (bcd->input.cursor < bcd->input.current)) { int repeat = !literal; literal = bcd->input.current; if (repeat) { bcd->input.current = srcbeg; bcd->output.current = destbeg; continue; } bcd->input.current = srcorig; } } } else { bcd->current.opcode = CTO_Always; if (!putCharacter(bcd, *bcd->input.current)) break; bcd->input.current += 1; } findLineBreakOpportunities(bcd, &lbo, lineBreakOpportunities, bcd->input.begin, getInputConsumed(bcd)); if (lineBreakOpportunities[getInputConsumed(bcd)]) { srcjoin = bcd->input.current; destjoin = bcd->output.current; if (bcd->current.opcode != CTO_JoinedWord) { srcword = bcd->input.current; destword = bcd->output.current; } } if ((bcd->output.current == bcd->output.begin) || bcd->output.current[-1]) { bcd->previous.opcode = bcd->current.opcode; } } done: if (bcd->input.current < bcd->input.end) { if (destword && (destword > bcd->output.begin) && (!(testPrevious(bcd, CTC_Space) || testCurrent(bcd, CTC_Space)) || (bcd->previous.opcode == CTO_JoinedWord))) { bcd->input.current = srcword; bcd->output.current = destword; } else if (destlast) { bcd->output.current = destlast; } } return 1; } static void finishCharacterEntry_native (BrailleContractionData *bcd, CharacterEntry *entry) { wchar_t character = entry->value; { const ContractionTableCharacter *ctc = getContractionTableCharacter(bcd, character); if (ctc) entry->attributes |= ctc->attributes; } { SetAlwaysRuleData sar = { .bcd = bcd, .character = entry }; int ok = (character == getReplacementCharacter())? setAlwaysRule(character, &sar): handleBestCharacter(character, setAlwaysRule, &sar); if (!ok) entry->always = NULL; } } static const ContractionTableTranslationMethods nativeTranslationMethods = { .contractText = contractText_native, .finishCharacterEntry = finishCharacterEntry_native }; const ContractionTableTranslationMethods * getContractionTableTranslationMethods_native (void) { return &nativeTranslationMethods; }