/*************************************************************************/ /* */ /* Copyright (c) 1997-98 Richard Tobin, Language Technology Group, HCRC, */ /* University of Edinburgh. */ /* */ /* THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, */ /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ /* IN NO EVENT SHALL THE AUTHOR OR THE UNIVERSITY OF EDINBURGH BE LIABLE */ /* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF */ /* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION */ /* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* */ /*************************************************************************/ /* $Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $ */ #ifndef lint static char vcid[] = "$Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $"; #endif /* lint */ /* * XML (and nSGML) parser. * Author: Richard Tobin. */ #include #include #ifdef FOR_LT #include "lt-memory.h" #include "nsllib.h" #define Malloc salloc #define Realloc srealloc #define Free sfree #else #include "system.h" #endif #include "charset.h" #include "string16.h" #include "ctype16.h" #include "dtd.h" #include "input.h" #include "stdio16.h" #include "xmlparser.h" static int transcribe(Parser p, int back, int count); static void pop_while_at_eoe(Parser p); static void maybe_uppercase(Parser p, Char *s); static void maybe_uppercase_name(Parser p); static int str_maybecase_cmp8(Parser p, const char8 *a, const char8 *b); static int is_ascii_alpha(int c); static int is_ascii_digit(int c); static int parse_external_id(Parser p, int required, char8 **publicid, char8 **systemid, int preq, int sreq); static int parse_conditional(Parser p); static int parse_notation_decl(Parser p); static int parse_entity_decl(Parser p, Entity ent, int line, int chpos); static int parse_attlist_decl(Parser p); static int parse_element_decl(Parser p); static ContentParticle parse_cp(Parser p); static ContentParticle parse_choice_or_seq(Parser p); static ContentParticle parse_choice_or_seq_1(Parser p, int nchildren,char sep); static int check_content_decl(Parser p, ContentParticle cp); static int check_content_decl_1(Parser p, ContentParticle cp); static Char *stringify_cp(ContentParticle cp); static void print_cp(ContentParticle cp, FILE16 *f); static int size_cp(ContentParticle cp); void FreeContentParticle(ContentParticle cp); static int parse_reference(Parser p, int pe, int expand, int allow_external); static int parse_character_reference(Parser p, int expand); static const char8 *escape(int c); static int parse_name(Parser p, const char8 *where); static int parse_nmtoken(Parser p, const char8 *where); static int looking_at(Parser p, const char8 *string); static void clear_xbit(XBit xbit); static int expect(Parser p, int expected, const char8 *where); static int expect_dtd_whitespace(Parser p, const char8 *where); static void skip_whitespace(InputSource s); static int skip_dtd_whitespace(Parser p, int allow_pe); static int parse_cdata(Parser p); static int process_nsl_decl(Parser p); static int process_xml_decl(Parser p); static int parse_dtd(Parser p); static int read_markupdecls(Parser p); static int error(Parser p, const char8 *format, ...); static void warn(Parser p, const char8 *format, ...); static void verror(XBit bit, const char8 *format, va_list args); enum literal_type {LT_cdata_attr, LT_tok_attr, LT_plain, LT_entity}; static int parse_string(Parser p, const char8 *where, enum literal_type type); static int parse_pi(Parser p); static int parse_comment(Parser p, int skip); static int parse_pcdata(Parser p); static int parse_starttag(Parser p); static int parse_attribute(Parser p); static int parse_endtag(Parser p); static int parse_markup(Parser p); static int parse(Parser p); static int parse_markupdecl(Parser p); #define require(x) if(x >= 0) {} else return -1 #define require0(x) if(x >= 0) {} else return 0 #define Consume(buf) (buf = 0, buf##size = 0) #define ExpandBuf(buf, sz) \ if(buf##size >= (sz)+1) {} else if((buf = Realloc(buf, (buf##size = sz + 1) * sizeof(Char)))) {} else return error(p, "System error") #define CopyName(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else return error(p, "System error"); #define CopyName0(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else {error(p, "System error"); return 0;} const char8 *XBitTypeName[XBIT_enum_count] = { "dtd", "start", "empty", "end", "eof", "pcdata", "pi", "comment", "cdsect", "xml", "error", "warning", "none" }; static Entity xml_builtin_entity; static Entity xml_predefined_entities; int ParserInit(void) { static int initialised = 0; Entity e, f; int i; static const Char lt[] = {'l','t',0}, ltval[] = {'&','#','6','0',';',0}; static const Char gt[] = {'g','t',0}, gtval[] = {'&','#','6','2',';',0}; static const Char amp[] = {'a','m','p',0}, ampval[] = {'&','#','3','8',';',0}; static const Char apos[] = {'a','p','o','s',0}, aposval[] = {'\'',0}; static const Char quot[] = {'q','u','o','t',0}, quotval[] = {'"',0}; static const Char *builtins[5][2] = { {lt, ltval}, {gt, gtval}, {amp, ampval}, {apos, aposval}, {quot, quotval} }; (void)vcid; if(initialised) return 0; initialised = 1; init_charset(); init_ctype16(); init_stdio16(); for(i=0, f=0; i<5; i++, f=e) { e = NewInternalEntity(builtins[i][0], builtins[i][1], xml_builtin_entity, 0, 0, 0); if(!e) return -1; e->next = f; } xml_predefined_entities = e; return 0; } static void skip_whitespace(InputSource s) { int c; while((c = get(s)) != XEOE && is_xml_whitespace(c)) ; unget(s); } /* * Skip whitespace and (optionally) the start and end of PEs. Return 1 if * there actually *was* some whitespace or a PE start/end, -1 if * an error occurred, 0 otherwise. */ static int skip_dtd_whitespace(Parser p, int allow_pe) { int c; int got_some = 0; InputSource s = p->source; while(1) { c = get(s); if(c == XEOE) { got_some = 1; if(s->parent) { if(!allow_pe) return error(p, "PE end not allowed here in internal subset"); if(s->entity->type == ET_external) p->external_pe_depth--; ParserPop(p); s = p->source; } else { unget(s); /* leave the final EOE waiting to be read */ return got_some; } } else if(is_xml_whitespace(c)) { got_some = 1; } else if(c == '%') { /* this complication is needed for source; if(s->entity->type == ET_external) p->external_pe_depth++; got_some = 1; } else { unget(s); return got_some; } } else { unget(s); return got_some; } } } static int expect(Parser p, int expected, const char8 *where) { int c; InputSource s = p->source; c = get(s); if(c != expected) { unget(s); /* For error position */ return error(p, "Expected %s %s, but got %s", escape(expected), where, escape(c)); } return 0; } /* * Expects whitespace or the start or end of a PE. */ static int expect_dtd_whitespace(Parser p, const char8 *where) { int r = skip_dtd_whitespace(p, p->external_pe_depth > 0); if(r < 0) return -1; if(r == 0) return error(p, "Expected whitespace %s", where); return 0; } static void clear_xbit(XBit xbit) { xbit->type = XBIT_none; xbit->s1 = xbit->s2 = 0; xbit->S1 = xbit->S2 = 0; xbit->attributes = 0; xbit->element_definition = 0; } void FreeXBit(XBit xbit) { Attribute a, b; if(xbit->S1) Free(xbit->S1); if(xbit->S2) Free(xbit->S2); if(xbit->type != XBIT_error && xbit->type != XBIT_warning && xbit->s1) Free(xbit->s1); if(xbit->s2) Free(xbit->s2); for(a = xbit->attributes; a; a = b) { b = a->next; if(a->value) Free(a->value); Free(a); } clear_xbit(xbit); } /* * Returns 1 if the input matches string (and consume the input). * Otherwise returns 0 and leaves the input stream where it was. * Case-sensitivity depends on the CaseInsensitive flag. * A space character at end of string matches any (non-zero) amount of * whitespace; space are treated literally elsewhere. * Never reads beyond an end-of-line, except to consume * extra whitespace when the last character of string is a space. * Never reads beyond end-of-entity. */ static int looking_at(Parser p, const char8 *string) { InputSource s = p->source; int c, d; int save = s->next; for(c = *string++; c; c = *string++) { if(at_eol(s)) goto fail; /* We would go over a line end */ d = get(s); if(c == ' ' && *string == 0) { if(d == XEOE || !is_xml_whitespace(d)) goto fail; skip_whitespace(s); } else if((ParserGetFlag(p, CaseInsensitive) && Toupper(d) != Toupper(c)) || (!ParserGetFlag(p, CaseInsensitive) && d != c)) goto fail; } return 1; fail: s->next = save; return 0; } static int parse_name(Parser p, const char8 *where) { InputSource s = p->source; int c, i; c = get(s); if(c == XEOE || !is_xml_namestart(c)) { unget(s); /* For error position */ error(p, "Expected name, but got %s %s", escape(c), where); return -1; } i = 1; while(c = get(s), (c != XEOE && is_xml_namechar(c))) i++; unget(s); p->name = s->line + s->next - i; p->namelen = i; return 0; } static int parse_nmtoken(Parser p, const char8 *where) { InputSource s = p->source; int c, i=0; while(c = get(s), (c !=XEOE && is_xml_namechar(c))) i++; unget(s); if(i == 0) return error(p, "Expected nmtoken value, but got %s %s", escape(c), where); p->name = s->line + s->next - i; p->namelen = i; return 0; } /* Escape a character for printing n an error message. NB returns 5 static storage buffers in rotation. */ static const char8 *escape(int c) { static char8 buf[5][15]; static int bufnum=-1; #if CHAR_SIZE == 8 if(c != XEOE) c &= 0xff; #endif bufnum = (bufnum + 1) % 5; if(c == XEOE) return ""; else if(c >= 33 && c <= 126) sprintf(buf[bufnum], "%c", c); else if(c == ' ') sprintf(buf[bufnum], ""); else sprintf(buf[bufnum], "<0x%x>", c); return buf[bufnum]; } Parser NewParser(void) { Parser p; if(ParserInit() == -1) return 0; p = Malloc(sizeof(*p)); if(!p) return 0; p->state = PS_prolog1; p->document_entity = 0; /* Set at first ParserPush */ p->have_dtd = 0; p->standalone = SDD_unspecified; p->flags = 0; p->source = 0; clear_xbit(&p->xbit); #ifndef FOR_LT p->xbit.nchildren = 0; /* These three should never be changed */ p->xbit.children = 0; p->xbit.parent = 0; #endif p->pbufsize = p->pbufnext = 0; p->pbuf = 0; p->peeked = 0; p->dtd = NewDtd(); p->dtd_callback = p->warning_callback = 0; p->entity_opener = 0; p->callback_arg = 0; p->external_pe_depth = 0; p->element_stack = 0; p->element_stack_alloc = 0; p->element_depth = 0; ParserSetFlag(p, XMLPiEnd, 1); ParserSetFlag(p, XMLEmptyTagEnd, 1); ParserSetFlag(p, XMLPredefinedEntities, 1); ParserSetFlag(p, XMLExternalIDs, 1); ParserSetFlag(p, XMLMiscWFErrors, 1); ParserSetFlag(p, ErrorOnUnquotedAttributeValues, 1); ParserSetFlag(p, XMLLessThan, 1); ParserSetFlag(p, IgnoreEntities, 0); ParserSetFlag(p, ExpandGeneralEntities, 1); ParserSetFlag(p, ExpandCharacterEntities, 1); ParserSetFlag(p, NormaliseAttributeValues, 1); ParserSetFlag(p, WarnOnUndefinedElements, 1); ParserSetFlag(p, WarnOnUndefinedAttributes, 1); ParserSetFlag(p, WarnOnRedefinitions, 1); ParserSetFlag(p, TrustSDD, 1); ParserSetFlag(p, ReturnComments, 1); ParserSetFlag(p, CheckEndTagsMatch, 1); return p; } void FreeParser(Parser p) { while (p->source) ParserPop(p); /* Will close file */ Free(p->pbuf); Free(p->element_stack); Free(p); } InputSource ParserRootSource(Parser p) { InputSource s; for(s=p->source; s && s->parent; s = s->parent) ; return s; } Entity ParserRootEntity(Parser p) { return ParserRootSource(p)->entity; } void ParserSetCallbackArg(Parser p, void *arg) { p->callback_arg = arg; } void ParserSetDtdCallback(Parser p, CallbackProc cb) { p->dtd_callback = cb; } void ParserSetWarningCallback(Parser p, CallbackProc cb) { p->warning_callback = cb; } void ParserSetEntityOpener(Parser p, EntityOpenerProc opener) { p->entity_opener = opener; } #ifndef FOR_LT XBit ReadXTree(Parser p) { XBit bit, tree, child; XBit *children; bit = ReadXBit(p); switch(bit->type) { case XBIT_error: return bit; case XBIT_start: if(!(tree = Malloc(sizeof(*tree)))) { error(p, "System error"); return &p->xbit; } *tree = *bit; while(1) { child = ReadXTree(p); switch(child->type) { case XBIT_error: FreeXTree(tree); return child; case XBIT_eof: FreeXTree(tree); { error(p, "EOF in element"); return &p->xbit; } case XBIT_end: if(child->element_definition != tree->element_definition) { const Char *name1 = tree->element_definition->name, *name2 = child->element_definition->name; FreeXTree(tree); FreeXTree(child); error(p, "Mismatched end tag: expected , got ", name1, name2); return &p->xbit; } FreeXTree(child); return tree; default: children = Realloc(tree->children, (tree->nchildren + 1) * sizeof(XBit)); if(!children) { FreeXTree(tree); FreeXTree(child); error(p, "System error"); return &p->xbit; } child->parent = tree; children[tree->nchildren] = child; tree->nchildren++; tree->children = children; break; } } default: if(!(tree = Malloc(sizeof(*tree)))) { error(p, "System error"); return &p->xbit; } *tree = *bit; return tree; } } void FreeXTree(XBit tree) { int i; for(i=0; inchildren; i++) FreeXTree(tree->children[i]); Free(tree->children); FreeXBit(tree); if(tree->type == XBIT_error) /* error "trees" are always in the Parser structure, not malloced */ return; Free(tree); } #endif /* (not) FOR_LT */ XBit ReadXBit(Parser p) { if(p->peeked) p->peeked = 0; else parse(p); return &p->xbit; } XBit PeekXBit(Parser p) { if(p->peeked) error(p, "Attempt to peek twice"); else { parse(p); p->peeked = 1; } return &p->xbit; } int ParserPush(Parser p, InputSource source) { if(!p->source && !p->document_entity) p->document_entity = source->entity; source->parent = p->source; p->source = source; if(source->entity->type == ET_internal) return 0; /* Look at first few bytes of external entities to guess encoding, then look for an XMLDecl or TextDecl. */ if(source->entity->encoding == CE_unknown) /* we might already know */ determine_character_encoding(source); #if CHAR_SIZE == 8 if(!EncodingIsAsciiSuperset(source->entity->encoding)) return error(p, "Unsupported character encoding %s", CharacterEncodingName[source->entity->encoding]); #else if(source->entity->encoding == CE_unknown) return error(p, "Unknown character encoding"); #endif get(source); unget(source); /* To get the first line read */ source->entity->ml_decl = ML_unspecified; if(looking_at(p, "entity == p->document_entity && !source->entity->version_decl) return error(p, "XML declaration in document entity lacked " "version number"); if(source->entity != p->document_entity && source->entity->standalone_decl != SDD_unspecified) return error(p, "Standalone attribute not allowed except in " "document entity"); return 0; } else if(!ParserGetFlag(p, XMLStrictWFErrors) && looking_at(p, "source; Fclose(source->file16); p->source = source->parent; if(source->entity->type == ET_external) Free(source->line); Free(source); } /* Returns true if the source is at EOE. If so, the EOE will have been read. */ static int at_eoe(InputSource s) { if(!at_eol(s)) return 0; if(s->seen_eoe || get_with_fill(s) == XEOE) return 1; unget(s); return 0; } /* Pops any sources that are at EOE. Leaves source buffer with at least one character in it (except at EOF, where it leaves the EOE unread). */ static void pop_while_at_eoe(Parser p) { while(1) { InputSource s = p->source; if(!at_eoe(s)) return; if(!s->parent) { unget(s); return; } ParserPop(p); } } void ParserSetFlag(Parser p, ParserFlag flag, int value) { if(value) p->flags |= (1 << flag); else p->flags &= ~(1 << flag); if(flag == XMLPredefinedEntities) { if(value) p->dtd->predefined_entities = xml_predefined_entities; else p->dtd->predefined_entities = 0; } } void ParserPerror(Parser p, XBit bit) { int linenum, charnum; InputSource s; Fprintf(Stderr, "%s: %s\n", bit->type == XBIT_error ? "Error" : "Warning", bit->error_message); for(s=p->source; s; s=s->parent) { if(s->entity->name) Fprintf(Stderr, " in entity \"%S\"", s->entity->name); else Fprintf(Stderr, " in unnamed entity"); switch(SourceLineAndChar(s, &linenum, &charnum)) { case 1: Fprintf(Stderr, " at line %d char %d of", linenum+1, charnum+1); break; case 0: Fprintf(Stderr, " defined at line %d char %d of", linenum+1, charnum+1); break; case -1: Fprintf(Stderr, " defined in"); break; } Fprintf(Stderr, " %s\n", EntityDescription(s->entity)); } } static int parse(Parser p) { int c; InputSource s; if(p->state == PS_end || p->state == PS_error) { /* After an error or EOF, just keep returning EOF */ p->xbit.type = XBIT_eof; return 0; } clear_xbit(&p->xbit); if(p->state <= PS_prolog2 || p->state == PS_epilog) skip_whitespace(p->source); restart: pop_while_at_eoe(p); s = p->source; SourcePosition(s, &p->xbit.entity, &p->xbit.byte_offset); switch(c = get(s)) { case XEOE: if(p->state != PS_epilog) return error(p, "Document ends too soon"); p->state = PS_end; p->xbit.type = XBIT_eof; return 0; case '<': return parse_markup(p); case '&': if(ParserGetFlag(p, IgnoreEntities)) goto pcdata; if(p->state <= PS_prolog2) return error(p, "Entity reference not allowed in prolog"); if(looking_at(p, "#")) { /* a character reference - go back and parse as pcdata */ unget(s); goto pcdata; } if(ParserGetFlag(p, ExpandGeneralEntities)) { /* an entity reference - push it and start again */ require(parse_reference(p, 0, 1, 1)); goto restart; } /* not expanding general entities, so treat as pcdata */ goto pcdata; default: pcdata: unget(s); return parse_pcdata(p); } } /* Called after reading '<' */ static int parse_markup(Parser p) { InputSource s = p->source; int c = get(s); switch(c) { case '!': if(looking_at(p, "--")) { if(ParserGetFlag(p, ReturnComments)) return parse_comment(p, 0); else { require(parse_comment(p, 1)); return parse(p); } } else if(looking_at(p, "DOCTYPE ")) return parse_dtd(p); else if(looking_at(p, "[CDATA[")) return parse_cdata(p); else return error(p, "Syntax error after xbit.type = XBIT_end; require(parse_name(p, "after element_depth <= 0) return error(p, "End tag outside of any element", p->namelen, p->name); ent = p->element_stack[--p->element_depth].entity; def = p->element_stack[p->element_depth].definition; if(p->namelen == def->namelen && memcmp(p->name, def->name, p->namelen * sizeof(Char)) == 0) p->xbit.element_definition = def; else return error(p, "Mismatched end tag: expected , got ", def->name, p->namelen, p->name); if(ent != p->source->entity) return error(p, "Element ends in different entity from that " "in which it starts"); if(p->element_depth == 0) p->state = PS_epilog; } else { p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen); if(!p->xbit.element_definition) return error(p, "End tag for unknown element %.*S", p->namelen, p->name); } skip_whitespace(p->source); return expect(p, '>', "after name in end tag"); } static int parse_starttag(Parser p) { int c; if(p->state == PS_epilog && !ParserGetFlag(p, AllowMultipleElements)) return error(p, "Document contains multiple elements"); p->state = PS_body; require(parse_name(p, "after <")); maybe_uppercase_name(p); p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen); if(!p->xbit.element_definition || p->xbit.element_definition->tentative) { if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedElements)) return error(p, "Start tag for undeclared element %.*S", p->namelen, p->name); if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedElements)) warn(p, "Start tag for undeclared element %.*S; " "declaring it to have content ANY", p->namelen, p->name); if(p->xbit.element_definition) RedefineElement(p->xbit.element_definition, CT_any, 0); else { if(!(p->xbit.element_definition = DefineElementN(p->dtd, p->name, p->namelen, CT_any, 0))) return error(p, "System error"); } } while(1) { InputSource s = p->source; /* We could just do skip_whitespace here, but we will get a better error message if we look a bit closer. */ c = get(s); if(c !=XEOE && is_xml_whitespace(c)) { skip_whitespace(s); c = get(s); } else if(c != '>' && !(ParserGetFlag(p, XMLEmptyTagEnd) && c == '/')) { unget(s); /* For error position */ return error(p, "Expected whitespace or tag end in start tag"); } if(c == '>') { p->xbit.type = XBIT_start; break; } if((ParserGetFlag(p, XMLEmptyTagEnd)) && c == '/') { require(expect(p, '>', "after / in start tag")); p->xbit.type = XBIT_empty; break; } unget(s); require(parse_attribute(p)); } if(ParserGetFlag(p, CheckEndTagsMatch)) { if(p->xbit.type == XBIT_start) { if(p->element_depth == p->element_stack_alloc) { p->element_stack_alloc = p->element_stack_alloc == 0 ? 20 : p->element_stack_alloc * 2; if(!(p->element_stack = Realloc(p->element_stack, (p->element_stack_alloc * sizeof(*p->element_stack))))) return error(p, "System error"); } p->element_stack[p->element_depth].definition = p->xbit.element_definition; p->element_stack[p->element_depth++].entity = p->source->entity; } else if(p->element_depth == 0) p->state = PS_epilog; } if(ParserGetFlag(p, ReturnDefaultedAttributes)) { AttributeDefinition d; Attribute a; for(d=NextAttributeDefinition(p->xbit.element_definition, 0); d; d=NextAttributeDefinition(p->xbit.element_definition, d)) { if(!d->default_value) continue; for(a=p->xbit.attributes; a; a=a->next) if(a->definition == d) break; if(!a) { if(!(a = Malloc(sizeof(*a)))) return error(p, "System error"); a->definition = d; if(!(a->value = Strdup(d->default_value))) return error(p, "System error"); a->quoted = 1; a->next = p->xbit.attributes; p->xbit.attributes = a; } } } return 0; } static int parse_attribute(Parser p) { InputSource s = p->source; AttributeDefinition def; struct attribute *a; int c; require(parse_name(p, "for attribute")); maybe_uppercase_name(p); def = FindAttributeN(p->xbit.element_definition, p->name, p->namelen); if(!def) { if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedAttributes)) return error(p, "Undeclared attribute %.*S for element %S", p->namelen, p->name, p->xbit.element_definition->name); if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedAttributes)) warn(p, "Undeclared attribute %.*S for element %S; " "declaring it as CDATA #IMPLIED", p->namelen, p->name, p->xbit.element_definition->name); if(!(def = DefineAttributeN(p->xbit.element_definition, p->name, p->namelen, AT_cdata, 0, DT_implied, 0))) return error(p, "System error"); } for(a = p->xbit.attributes; a; a = a->next) if(a->definition == def) return error(p, "Repeated attribute %.*S", p->namelen, p->name); if(!(a = Malloc(sizeof(*a)))) return error(p, "System error"); a->value = 0; /* in case of error */ a->next = p->xbit.attributes; p->xbit.attributes = a; a->definition = def; skip_whitespace(s); require(expect(p, '=', "after attribute name")); skip_whitespace(s); c = get(s); unget(s); switch(c) { case '"': case '\'': a->quoted = 1; require(parse_string(p, "in attribute value", a->definition->type == AT_cdata ? LT_cdata_attr : LT_tok_attr)); a->value = p->pbuf; Consume(p->pbuf); break; default: if(ParserGetFlag(p, ErrorOnUnquotedAttributeValues)) return error(p, "Value of attribute is unquoted"); a->quoted = 0; require(parse_nmtoken(p, "in unquoted attribute value")); CopyName(a->value); break; } return 0; } static int transcribe(Parser p, int back, int count) { ExpandBuf(p->pbuf, p->pbufnext + count); memcpy(p->pbuf + p->pbufnext, p->source->line + p->source->next - back, count * sizeof(Char)); p->pbufnext += count; return 0; } /* Called after pushing back the first character of the pcdata */ static int parse_pcdata(Parser p) { int count = 0; InputSource s; Char *buf; int next, buflen; if(p->state <= PS_prolog2) return error(p, "Character data not allowed in prolog"); if(p->state == PS_epilog) return error(p, "Character data not allowed after body"); s = p->source; buf = s->line; next = s->next; buflen = s->line_length; p->pbufnext = 0; while(1) { if(next == buflen) { s->next = next; if(count > 0) { require(transcribe(p, count, count)); } count = 0; if(at_eoe(s)) { if(!ParserGetFlag(p, MergePCData)) goto done; else pop_while_at_eoe(p); } s = p->source; buf = s->line; next = s->next; buflen = s->line_length; if(next == buflen) goto done; /* must be EOF */ } switch(buf[next++]) { case '<': if(!ParserGetFlag(p, XMLLessThan)) { /* In nSGML, don't recognise < as markup unless it looks ok */ if(next == buflen) goto deflt; if(buf[next] != '!' && buf[next] != '/' && buf[next] != '?' && !is_xml_namestart(buf[next])) goto deflt; } s->next = next; if(count > 0) { require(transcribe(p, count+1, count)); } count = 0; if(!ParserGetFlag(p, ReturnComments) && buflen >= next + 3 && buf[next] == '!' && buf[next+1] == '-' && buf[next+2] == '-') { s->next = next + 3; require(parse_comment(p, 1)); buflen = s->line_length; next = s->next; } else { s->next = next-1; goto done; } break; case '&': if(ParserGetFlag(p, IgnoreEntities)) goto deflt; if(!ParserGetFlag(p, MergePCData) && (p->pbufnext > 0 || count > 0)) { /* We're returning references as separate bits, and we've come to one, and we've already got some data to return, so return what we've got and get the reference next time. */ s->next = next-1; if(count > 0) { require(transcribe(p, count, count)); } goto done; } if(buflen >= next+1 && buf[next] == '#') { /* It's a character reference */ s->next = next+1; if(count > 0) { require(transcribe(p, count+2, count)); } count = 0; require(parse_character_reference(p, ParserGetFlag(p, ExpandCharacterEntities))); next = s->next; if(!ParserGetFlag(p, MergePCData)) goto done; } else { /* It's a general entity reference */ s->next = next; if(count > 0) { require(transcribe(p, count+1, count)); } count = 0; require(parse_reference(p, 0, ParserGetFlag(p, ExpandGeneralEntities), 1)); s = p->source; buf = s->line; buflen = s->line_length; next = s->next; if(!ParserGetFlag(p, MergePCData)) goto done; } break; case ']': if(ParserGetFlag(p, XMLMiscWFErrors) && buflen >= next + 2 && buf[next] == ']' && buf[next+1] == '>') return error(p, "Illegal character sequence ']]>' in pcdata"); /* fall through */ default: deflt: count++; break; } } done: p->pbuf[p->pbufnext++] = 0; p->xbit.type = XBIT_pcdata; p->xbit.pcdata_chars = p->pbuf; Consume(p->pbuf); return 0; } /* Called after reading '