/* -*- Mode: c; c-basic-offset: 2 -*- * * raptor_rdfxml.c - Raptor RDF/XML Parser * * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/ * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ * * This package is Free Software and part of Redland http://librdf.org/ * * It is licensed under the following three licenses as alternatives: * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version * 2. GNU General Public License (GPL) V2 or any newer version * 3. Apache License, V2.0 or any newer version * * You may not use this file except in compliance with at least one of * the above three licenses. * * See LICENSE.html or LICENSE.txt at the top of this package for the * complete terms and further detail along with the license texts for * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. * * */ #ifdef HAVE_CONFIG_H #include #endif #ifdef WIN32 #include #endif #include #include #include #include #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif /* Raptor includes */ #include "raptor.h" #include "raptor_internal.h" /* Define these for far too much output */ #undef RAPTOR_DEBUG_VERBOSE #undef RAPTOR_DEBUG_CDATA /* Raptor structures */ typedef enum { /* Catch uninitialised state */ RAPTOR_STATE_INVALID = 0, /* Skipping current tree of elements - used to recover finding * illegal content, when parsling permissively. */ RAPTOR_STATE_SKIPPING, /* Not in RDF grammar yet - searching for a start element. * * This can be (goto NODE_ELEMENT_LIST) but since it is optional, * the start element can also be one of * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs * * If RDF content is assumed, go straight to OBJ */ RAPTOR_STATE_UNKNOWN, /* A list of node elements * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList */ RAPTOR_STATE_NODE_ELEMENT_LIST, /* Found an */ RAPTOR_STATE_DESCRIPTION, /* Found a property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt */ RAPTOR_STATE_PROPERTYELT, /* A property element that is an ordinal - rdf:li, rdf:_n */ RAPTOR_STATE_MEMBER_PROPERTYELT, /* Found a node element * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement */ RAPTOR_STATE_NODE_ELEMENT, /* A property element with rdf:parseType="Literal" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt */ RAPTOR_STATE_PARSETYPE_LITERAL, /* A property element with rdf:parseType="Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt */ RAPTOR_STATE_PARSETYPE_RESOURCE, /* A property element with rdf:parseType="Collection" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt * * (This also handles daml:Collection) */ RAPTOR_STATE_PARSETYPE_COLLECTION, /* A property element with a rdf:parseType attribute and a value * not "Literal" or "Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt */ RAPTOR_STATE_PARSETYPE_OTHER, RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER } raptor_state; static const char * const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2]={ "INVALID", "SKIPPING", "UNKNOWN", "nodeElementList", "propertyElt", "Description", "propertyElt", "memberPropertyElt", "nodeElement", "parseTypeLiteral", "parseTypeResource", "parseTypeCollection", "parseTypeOther" }; static const char * raptor_rdfxml_state_as_string(raptor_state state) { if(state<1 || state > RAPTOR_STATE_PARSETYPE_LAST) state=(raptor_state)0; return raptor_state_names[(int)state]; } /* * RDF/XML syntax terms, properties and classes. * Must match names in rdf_syntax_terms_info below. */ typedef enum { RDF_ATTR_RDF = 0, RDF_ATTR_Description = 1, RDF_ATTR_li = 2, RDF_ATTR_about = 3, /* value of rdf:about attribute */ RDF_ATTR_aboutEach = 4, /* " rdf:aboutEach */ RDF_ATTR_aboutEachPrefix = 5, /* " rdf:aboutEachPrefix */ RDF_ATTR_ID = 6, /* " rdf:ID */ RDF_ATTR_bagID = 7, /* " rdf:bagID */ RDF_ATTR_resource = 8, /* " rdf:resource */ RDF_ATTR_parseType = 9, /* " rdf:parseType */ RDF_ATTR_nodeID = 10, /* " rdf:nodeID */ RDF_ATTR_datatype = 11, /* " rdf:datatype */ /* rdf:Property-s */ RDF_ATTR_type = 12, /* " rdf:type -- a property in RDF Model */ RDF_ATTR_value = 13, /* " rdf:value -- a property in RDF model */ RDF_ATTR_subject = 14, /* " rdf:subject -- a property in RDF model */ RDF_ATTR_predicate = 15, /* " rdf:predicate -- a property in RDF model */ RDF_ATTR_object = 16, /* " rdf:object -- a property in RDF model */ RDF_ATTR_first = 17, /* " rdf:first -- a property in RDF model */ RDF_ATTR_rest = 18, /* " rdf:rest -- a property in RDF model */ /* rdfs:Class-s */ RDF_ATTR_Seq = 19, /* " rdf:Seq -- a class in RDF Model */ RDF_ATTR_Bag = 20, /* " rdf:Bag -- a class in RDF model */ RDF_ATTR_Alt = 21, /* " rdf:Alt -- a class in RDF model */ RDF_ATTR_Statement = 22, /* " rdf:Statement -- a class in RDF model */ RDF_ATTR_Property = 23, /* " rdf:Property -- a class in RDF model */ RDF_ATTR_List = 24, /* " rdf:List -- a class in RDF model */ RDF_ATTR_XMLLiteral = 25, /* " rdf:XMLLiteral - a cless in RDF graph */ /* rdfs:Resource-s */ RDF_ATTR_nil = 26, /* " rdf:nil -- a resource in RDF graph */ RDF_ATTR_LAST = RDF_ATTR_nil } rdf_attr; /* * http://www.w3.org/TR/rdf-syntax-grammar/#section-grammar-summary * * coreSyntaxTerms := rdf:RDF | rdf:ID | rdf:about | rdf:bagID | rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype * syntaxTerms := coreSyntaxTerms | rdf:Description | rdf:li * oldTerms := rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID * * nodeElementURIs := anyURI - ( coreSyntaxTerms | rdf:li | oldTerms ) * propertyElementURIs := anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms ) * propertyAttributeURIs := anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms ) * * So, forbidden terms in the RDF namespace are: * nodeElements * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | * li | aboutEach | aboutEachPrefix | bagID * * propertyElements * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | * Description | aboutEach | aboutEachPrefix | bagID * * propertyAttributes * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | * Description | li | aboutEach | aboutEachPrefix | bagID * * Information about rdf attributes: * raptor_identifier_type type * Set when the attribute is a property rather than just syntax * NOTE: raptor_rdfxml_process_property_attributes() expects only * RAPTOR_IDENTIFIER_TYPE_NONE, * RAPTOR_IDENTIFIER_TYPE_LITERAL or RAPTOR_IDENTIFIER_TYPE_RESOURCE * allowed_unprefixed_on_attribute * If allowed for legacy reasons to be unprefixed as an attribute. * */ static const struct { const char *name; /* term name */ int forbidden_as_nodeElement; int forbidden_as_propertyElement; int forbidden_as_propertyAttribute; raptor_identifier_type type; /* statement value */ int allowed_unprefixed_on_attribute; } rdf_syntax_terms_info[]={ /* syntax only */ { "RDF", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "Description", 0, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "li", 1, 0, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "about", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 }, { "aboutEach", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "aboutEachPrefix", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "ID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 }, { "bagID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 }, { "resource", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 }, { "parseType", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 }, { "nodeID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, { "datatype", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }, /* rdf:Property-s */ { "type", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_RESOURCE, 1 }, { "value", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "subject", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "predicate", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "object", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "first", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "rest", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, /* rdfs:Class-s */ { "Seq", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "Bag", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "Alt", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "Statement", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "Property", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "List", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { "XMLLiteral", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, /* rdfs:Resource-s */ { "nil", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 }, { NULL , 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 } }; static int raptor_rdfxml_forbidden_nodeElement_name(const char *name) { int i; if(*name == '_') return 0; for(i=0; rdf_syntax_terms_info[i].name; i++) if(!strcmp(rdf_syntax_terms_info[i].name, name)) return rdf_syntax_terms_info[i].forbidden_as_nodeElement; return -1; } static int raptor_rdfxml_forbidden_propertyElement_name(const char *name) { int i; if(*name == '_') return 0; for(i=0; rdf_syntax_terms_info[i].name; i++) if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name)) return rdf_syntax_terms_info[i].forbidden_as_propertyElement; return -1; } static int raptor_rdfxml_forbidden_propertyAttribute_name(const char *name) { int i; if(*name == '_') return 0; for(i=0; rdf_syntax_terms_info[i].name; i++) if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name)) return rdf_syntax_terms_info[i].forbidden_as_propertyAttribute; return -1; } typedef enum { /* undetermined yet - whitespace is stored */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN, /* literal content - no elements, cdata allowed, whitespace significant * blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL, /* parseType literal content (WF XML) - all content preserved * blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL, /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored, * any non-whitespace cdata is error * only used for or implict */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES, /* properties - 0+ elements expected, no cdata, whitespace ignored, * any non-whitespace cdata is error * blah blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES, /* property content - all content preserved * any content type changes when first non-whitespace found * ... */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT, /* resource URI given - no element, no cdata, whitespace ignored, * any non-whitespace cdata is error * * */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE, /* skipping content - all content is preserved * Used when skipping content for unknown parseType-s, * error recovery, some other reason */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED, /* parseType Collection - all content preserved * Parsing of this determined by RDF/XML (Revised) closed collection rules * ... */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION, /* Like above but handles "daml:collection" */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION, /* dummy for use in strings below */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST } raptor_rdfxml_element_content_type; static const struct { const char * name; int whitespace_significant; /* non-blank cdata */ int cdata_allowed; /* XML element content */ int element_allowed; /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */ int rdf_processing; } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={ {"Unknown", 1, 1, 1, 0 }, {"Literal", 1, 1, 0, 0 }, {"XML Literal", 1, 1, 1, 0 }, {"Nodes", 0, 0, 1, 1 }, {"Properties", 0, 1, 1, 1 }, {"Property Content",1, 1, 1, 1 }, {"Resource", 0, 0, 0, 0 }, {"Preserved", 1, 1, 1, 0 }, {"Collection", 1, 1, 1, 1 }, {"DAML Collection", 1, 1, 1, 1 }, }; static const char * raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type) { if(type > RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST) return "INVALID"; return rdf_content_type_info[type].name; } /* * Raptor Element/attributes on stack */ struct raptor_rdfxml_element_s { raptor_world* world; raptor_xml_element *xml_element; /* NULL at bottom of stack */ struct raptor_rdfxml_element_s *parent; /* attributes declared in M&S */ const unsigned char * rdf_attr[RDF_ATTR_LAST+1]; /* how many of above seen */ int rdf_attr_count; /* state that this production matches */ raptor_state state; /* how to handle the content inside this XML element */ raptor_rdfxml_element_content_type content_type; /* starting state for children of this element */ raptor_state child_state; /* starting content type for children of this element */ raptor_rdfxml_element_content_type child_content_type; /* STATIC Reified statement identifier */ raptor_identifier reified; /* STATIC Bag identifier */ raptor_identifier bag; int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */ /* STATIC Subject identifier (URI/anon ID), type, source * * When the XML element represents a node, this is the identifier */ raptor_identifier subject; /* STATIC Predicate URI, source is either * RAPTOR_URI_SOURCE_ELEMENT or RAPTOR_URI_SOURCE_ATTRIBUTE * * When the XML element represents a node or predicate, * this is the identifier of the predicate */ raptor_identifier predicate; /* STATIC Object identifier (URI/anon ID), type, source * * When this XML element generates a statement that needs an object, * possibly from a child element, this is the identifier of the object */ raptor_identifier object; /* URI of datatype of literal */ raptor_uri *object_literal_datatype; /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */ int last_ordinal; /* If this element's parseType is a Collection * this identifies the anon node of current tail of the collection(list). */ const unsigned char *tail_id; /* RDF/XML specific checks */ /* all cdata so far is whitespace */ unsigned int content_cdata_all_whitespace; }; typedef struct raptor_rdfxml_element_s raptor_rdfxml_element; #define RAPTOR_RDFXML_N_CONCEPTS 22 /* * Raptor parser object */ struct raptor_rdfxml_parser_s { raptor_sax2 *sax2; /* stack of elements - elements add after current_element */ raptor_rdfxml_element *root_element; raptor_rdfxml_element *current_element; raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS]; /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */ raptor_id_set* id_set; void *xml_content; size_t xml_content_length; raptor_iostream* iostream; /* writer for building parseType="Literal" content */ raptor_xml_writer* xml_writer; }; /* static variables */ #define RAPTOR_RDF_type_URI(rdf_xml_parser) rdf_xml_parser->concepts[0] #define RAPTOR_RDF_value_URI(rdf_xml_parser) rdf_xml_parser->concepts[1] #define RAPTOR_RDF_subject_URI(rdf_xml_parser) rdf_xml_parser->concepts[2] #define RAPTOR_RDF_predicate_URI(rdf_xml_parser) rdf_xml_parser->concepts[3] #define RAPTOR_RDF_object_URI(rdf_xml_parser) rdf_xml_parser->concepts[4] #define RAPTOR_RDF_Statement_URI(rdf_xml_parser) rdf_xml_parser->concepts[5] #define RAPTOR_RDF_Seq_URI(rdf_xml_parser) rdf_xml_parser->concepts[6] #define RAPTOR_RDF_Bag_URI(rdf_xml_parser) rdf_xml_parser->concepts[7] #define RAPTOR_RDF_Alt_URI(rdf_xml_parser) rdf_xml_parser->concepts[8] #define RAPTOR_RDF_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[9] #define RAPTOR_RDF_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[10] #define RAPTOR_RDF_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[11] #define RAPTOR_RDF_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[12] #define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[13] #define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[14] #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[15] #define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[16] #define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[17] #define RAPTOR_RDF_RDF_URI(rdf_xml_parser) rdf_xml_parser->concepts[18] #define RAPTOR_RDF_Description_URI(rdf_xml_parser) rdf_xml_parser->concepts[19] #define RAPTOR_RDF_li_URI(rdf_xml_parser) rdf_xml_parser->concepts[20] #define RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser) rdf_xml_parser->concepts[21] /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */ /* prototypes for element functions */ static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser); static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element); static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id); /* prototypes for grammar functions */ static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata); /* prototype for statement related functions */ static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const unsigned char *predicate_id, const raptor_identifier_type predicate_type, const raptor_uri_source predicate_uri_source, int predicate_ordinal, raptor_uri *object_uri, const unsigned char *object_id, const raptor_identifier_type object_type, const raptor_uri_source object_uri_source, raptor_uri *literal_datatype, raptor_identifier *reified, raptor_rdfxml_element *bag_element); /* Prototypes for parsing data functions */ static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name); static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser); static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser); static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end); static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser); static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser); static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser) { raptor_rdfxml_element *element=rdf_xml_parser->current_element; if(!element) return NULL; rdf_xml_parser->current_element=element->parent; if(rdf_xml_parser->root_element == element) /* just deleted root */ rdf_xml_parser->root_element=NULL; return element; } static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element) { element->parent=rdf_xml_parser->current_element; rdf_xml_parser->current_element=element; if(!rdf_xml_parser->root_element) rdf_xml_parser->root_element=element; } static void raptor_free_rdfxml_element(raptor_rdfxml_element *element) { int i; /* Free special RDF M&S attributes */ for(i=0; i<= RDF_ATTR_LAST; i++) if(element->rdf_attr[i]) RAPTOR_FREE(cstring, (void*)element->rdf_attr[i]); raptor_free_identifier(&element->subject); raptor_free_identifier(&element->predicate); raptor_free_identifier(&element->object); raptor_free_identifier(&element->bag); raptor_free_identifier(&element->reified); if(element->tail_id) RAPTOR_FREE(cstring, (char*)element->tail_id); if(element->object_literal_datatype) raptor_free_uri_v2(element->world, element->object_literal_datatype); RAPTOR_FREE(raptor_rdfxml_element, element); } static void raptor_rdfxml_sax2_new_namespace_handler(void *user_data, raptor_namespace* nspace) { raptor_parser* rdf_parser; const unsigned char* namespace_name; size_t namespace_name_len; raptor_uri* uri=raptor_namespace_get_uri(nspace); rdf_parser=(raptor_parser*)user_data; raptor_parser_start_namespace(rdf_parser, nspace); if(!uri) return; namespace_name=raptor_uri_as_counted_string_v2(nspace->nstack->world, uri, &namespace_name_len); if(namespace_name_len == raptor_rdf_namespace_uri_len-1 && !strncmp((const char*)namespace_name, (const char*)raptor_rdf_namespace_uri, namespace_name_len)) { const unsigned char *prefix=raptor_namespace_get_prefix(nspace); raptor_parser_warning(rdf_parser, "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", prefix, namespace_name); } if(namespace_name_len > raptor_rdf_namespace_uri_len && !strncmp((const char*)namespace_name, (const char*)raptor_rdf_namespace_uri, raptor_rdf_namespace_uri_len)) { raptor_parser_error(rdf_parser, "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", namespace_name); } } static void raptor_rdfxml_start_element_handler(void *user_data, raptor_xml_element* xml_element) { raptor_parser* rdf_parser; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; int ns_attributes_count=0; raptor_qname** named_attrs=NULL; int i; int count_bumped=0; rdf_parser=(raptor_parser*)user_data; rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; if(rdf_parser->failed) return; raptor_rdfxml_update_document_locator(rdf_parser); /* Create new element structure */ element=(raptor_rdfxml_element*)RAPTOR_CALLOC(raptor_rdfxml_element, 1, sizeof(raptor_rdfxml_element)); if(!element) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); rdf_parser->failed=1; return; } element->world=rdf_parser->world; element->xml_element=xml_element; /* init world fields in identifiers not created with raptor_new_identifier() */ element->reified.world= element->bag.world= element->subject.world= element->predicate.world= element->object.world= rdf_parser->world; raptor_rdfxml_element_push(rdf_xml_parser, element); named_attrs=raptor_xml_element_get_attributes(xml_element); ns_attributes_count=raptor_xml_element_get_attributes_count(xml_element); /* RDF-specific processing of attributes */ if(ns_attributes_count) { raptor_qname** new_named_attrs; int offset = 0; raptor_rdfxml_element* parent_element; parent_element=element->parent; /* Allocate new array to move namespaced-attributes to if * rdf processing is performed */ new_named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array, ns_attributes_count, sizeof(raptor_qname*)); if(!new_named_attrs) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); rdf_parser->failed=1; return; } for (i = 0; i < ns_attributes_count; i++) { raptor_qname* attr=named_attrs[i]; /* If: * 1 We are handling RDF content and RDF processing is allowed on * this element * OR * 2 We are not handling RDF content and * this element is at the top level (top level Desc. / typedNode) * i.e. we have no parent * then handle the RDF attributes */ if((parent_element && rdf_content_type_info[parent_element->child_content_type].rdf_processing) || !parent_element) { /* Save pointers to some RDF M&S attributes */ /* If RDF namespace-prefixed attributes */ if(attr->nspace && attr->nspace->is_rdf_ms) { const unsigned char *attr_name=attr->local_name; int j; for(j=0; j<= RDF_ATTR_LAST; j++) if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) { element->rdf_attr[j]=attr->value; element->rdf_attr_count++; /* Delete it if it was stored elsewhere */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", (char*)attr_name, attr->value); #endif /* make sure value isn't deleted from qname structure */ attr->value=NULL; raptor_free_qname(attr); attr=NULL; break; } } /* end if RDF namespaced-prefixed attributes */ if(!attr) continue; /* If non namespace-prefixed RDF attributes found on an element */ if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES] && !attr->nspace) { const unsigned char *attr_name=attr->local_name; int j; for(j=0; j<= RDF_ATTR_LAST; j++) if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) { element->rdf_attr[j]=attr->value; element->rdf_attr_count++; if(!rdf_syntax_terms_info[j].allowed_unprefixed_on_attribute) raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name); /* Delete it if it was stored elsewhere */ /* make sure value isn't deleted from qname structure */ attr->value=NULL; raptor_free_qname(attr); attr=NULL; break; } } /* end if non-namespace prefixed RDF attributes */ if(!attr) continue; } /* end if leave literal XML alone */ if(attr) new_named_attrs[offset++]=attr; } /* new attribute count is set from attributes that haven't been skipped */ ns_attributes_count=offset; if(!ns_attributes_count) { /* all attributes were deleted so delete the new array */ RAPTOR_FREE(raptor_qname_array, new_named_attrs); new_named_attrs=NULL; } RAPTOR_FREE(raptor_qname_array, named_attrs); named_attrs=new_named_attrs; raptor_xml_element_set_attributes(xml_element, named_attrs, ns_attributes_count); } /* end if ns_attributes_count */ /* start from unknown; if we have a parent, it may set this */ element->state=RAPTOR_STATE_UNKNOWN; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN; if(element->parent && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) { element->content_type=element->parent->child_content_type; if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE && element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { /* If parent has an rdf:resource, this element should not be here */ raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); element->state=RAPTOR_STATE_SKIPPING; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } else { if(!element->parent->child_state) { raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_handler: no parent element child_state set"); return; } element->state=element->parent->child_state; element->parent->xml_element->content_element_seen++; count_bumped++; /* leave literal XML alone */ if (!rdf_content_type_info[element->content_type].cdata_allowed) { if(element->parent->xml_element->content_element_seen && element->parent->xml_element->content_cdata_seen) { /* Uh oh - mixed content, the parent element has cdata too */ raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); } /* If there is some existing all-whitespace content cdata * before this node element, delete it */ if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES && element->parent->xml_element->content_element_seen && element->parent->content_cdata_all_whitespace && element->parent->xml_element->content_cdata_length) { element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb); element->parent->xml_element->content_cdata_sb=NULL; element->parent->xml_element->content_cdata_length=0; } } /* end if leave literal XML alone */ } /* end if parent has no rdf:resource */ } /* end if element->parent */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name); fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: "); raptor_print_xml_element(xml_element, stderr); #endif /* Check for non namespaced stuff when not in a parseType literal, other */ if (rdf_content_type_info[element->content_type].rdf_processing) { /* The element */ /* If has no namespace or the namespace has no name (xmlns="") */ if(!raptor_xml_element_get_name(xml_element)->nspace || (raptor_xml_element_get_name(xml_element)->nspace && !raptor_namespace_get_uri(raptor_xml_element_get_name(xml_element)->nspace))) { raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); element->state=RAPTOR_STATE_SKIPPING; /* Remove count above so that parent thinks this is empty */ if(count_bumped) element->parent->xml_element->content_element_seen--; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } /* Check for any remaining non-namespaced attributes */ if (named_attrs) { for(i=0; i < ns_attributes_count; i++) { raptor_qname *attr=named_attrs[i]; /* Check if any attributes are non-namespaced */ if(!attr->nspace || (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) { raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name); raptor_free_qname(attr); named_attrs[i]=NULL; } } } } if (element->rdf_attr[RDF_ATTR_aboutEach] || element->rdf_attr[RDF_ATTR_aboutEachPrefix]) { raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.", raptor_xml_element_get_name(xml_element)->local_name); element->state=RAPTOR_STATE_SKIPPING; /* Remove count above so that parent thinks this is empty */ if(count_bumped) element->parent->xml_element->content_element_seen--; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } /* Right, now ready to enter the grammar */ raptor_rdfxml_start_element_grammar(rdf_parser, element); return; } static void raptor_rdfxml_end_element_handler(void *user_data, raptor_xml_element* xml_element) { raptor_parser* rdf_parser; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; rdf_parser=(raptor_parser*)user_data; rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; if(!rdf_parser->failed) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element); } element=raptor_rdfxml_element_pop(rdf_xml_parser); if(element) { if(element->parent) { /* Do not change this; PROPERTYELT will turn into MEMBER if necessary * See the switch case for MEMBER / PROPERTYELT where the test is done. * * PARSETYPE_RESOURCE should never be propogated up since it * will turn the next child (node) element into a property */ if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT && element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) element->parent->child_state=element->state; } raptor_free_rdfxml_element(element); } } /* cdata (and ignorable whitespace for libxml). * s is not 0 terminated for expat, is for libxml - grrrr. */ static void raptor_rdfxml_characters_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len) { raptor_parser* rdf_parser=(raptor_parser*)user_data; raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0); } /* cdata (and ignorable whitespace for libxml). * s is not 0 terminated for expat, is for libxml - grrrr. */ static void raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len) { raptor_parser* rdf_parser=(raptor_parser*)user_data; raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1); } /* comment handler * s is 0 terminated */ static void raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s) { raptor_parser* rdf_parser=(raptor_parser*)user_data; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; if(rdf_parser->failed || !xml_element) return; rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; element=rdf_xml_parser->current_element; if(element) { if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s); } #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("XML Comment '%s'\n", s); #endif } static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name) { raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; raptor_sax2* sax2; raptor_world* world=rdf_parser->world; /* Allocate sax2 object */ sax2=raptor_new_sax2(rdf_parser, &rdf_parser->error_handlers); rdf_xml_parser->sax2=sax2; if(!sax2) return 1; /* Initialize sax2 element handlers */ raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler); raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler); raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler); raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler); raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler); raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler); /* Allocate uris */ RAPTOR_RDF_type_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "type"); RAPTOR_RDF_value_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "value"); RAPTOR_RDF_subject_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "subject"); RAPTOR_RDF_predicate_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "predicate"); RAPTOR_RDF_object_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "object"); RAPTOR_RDF_Statement_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Statement"); RAPTOR_RDF_Seq_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Seq"); RAPTOR_RDF_Bag_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Bag"); RAPTOR_RDF_Alt_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Alt"); RAPTOR_RDF_List_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "List"); RAPTOR_RDF_first_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "first"); RAPTOR_RDF_rest_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "rest"); RAPTOR_RDF_nil_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "nil"); RAPTOR_DAML_NS_URI(rdf_xml_parser)=raptor_new_uri_v2(world, (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#"); RAPTOR_DAML_List_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List"); RAPTOR_DAML_first_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first"); RAPTOR_DAML_rest_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest"); RAPTOR_DAML_nil_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil"); RAPTOR_RDF_RDF_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "RDF"); RAPTOR_RDF_Description_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Description"); RAPTOR_RDF_li_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "li"); RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)=raptor_new_uri_v2(world, raptor_xml_literal_datatype_uri_string); /* Check for uri allocation failures */ if(!RAPTOR_RDF_type_URI(rdf_xml_parser) || !RAPTOR_RDF_value_URI(rdf_xml_parser) || !RAPTOR_RDF_subject_URI(rdf_xml_parser) || !RAPTOR_RDF_predicate_URI(rdf_xml_parser) || !RAPTOR_RDF_object_URI(rdf_xml_parser) || !RAPTOR_RDF_Statement_URI(rdf_xml_parser) || !RAPTOR_RDF_Seq_URI(rdf_xml_parser) || !RAPTOR_RDF_Bag_URI(rdf_xml_parser) || !RAPTOR_RDF_Alt_URI(rdf_xml_parser) || !RAPTOR_RDF_List_URI(rdf_xml_parser) || !RAPTOR_RDF_first_URI(rdf_xml_parser) || !RAPTOR_RDF_rest_URI(rdf_xml_parser) || !RAPTOR_RDF_nil_URI(rdf_xml_parser) || !RAPTOR_DAML_NS_URI(rdf_xml_parser) || !RAPTOR_DAML_List_URI(rdf_xml_parser) || !RAPTOR_DAML_first_URI(rdf_xml_parser) || !RAPTOR_DAML_rest_URI(rdf_xml_parser) || !RAPTOR_DAML_nil_URI(rdf_xml_parser) || !RAPTOR_RDF_RDF_URI(rdf_xml_parser) || !RAPTOR_RDF_Description_URI(rdf_xml_parser) || !RAPTOR_RDF_li_URI(rdf_xml_parser) || !RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)) return 1; /* Everything succeeded */ return 0; } static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser) { raptor_uri *uri=rdf_parser->base_uri; raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; /* base URI required for RDF/XML */ if(!uri) return 1; /* Optionally normalize language to lowercase * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier */ raptor_sax2_set_feature(rdf_xml_parser->sax2, RAPTOR_FEATURE_NORMALIZE_LANGUAGE, rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]); /* Optionally forbid network requests in the XML parser */ raptor_sax2_set_feature(rdf_xml_parser->sax2, RAPTOR_FEATURE_NO_NET, rdf_parser->features[RAPTOR_FEATURE_NO_NET]); raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); /* Delete any existing id_set */ if(rdf_xml_parser->id_set) { raptor_free_id_set(rdf_xml_parser->id_set); rdf_xml_parser->id_set = NULL; } /* Create a new id_set if needed */ if(rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]) { rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world); if(!rdf_xml_parser->id_set) return 1; } return 0; } static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser) { raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; raptor_rdfxml_element* element; int i; if(rdf_xml_parser->sax2) { raptor_free_sax2(rdf_xml_parser->sax2); rdf_xml_parser->sax2=NULL; } while( (element=raptor_rdfxml_element_pop(rdf_xml_parser)) ) raptor_free_rdfxml_element(element); for(i=0; i< RAPTOR_RDFXML_N_CONCEPTS; i++) { raptor_uri* concept_uri=rdf_xml_parser->concepts[i]; if(concept_uri) { raptor_free_uri_v2(rdf_parser->world, concept_uri); rdf_xml_parser->concepts[i]=NULL; } } if(rdf_xml_parser->id_set) { raptor_free_id_set(rdf_xml_parser->id_set); rdf_xml_parser->id_set=NULL; } } static int raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory, const unsigned char *buffer, size_t len, const unsigned char *identifier, const unsigned char *suffix, const char *mime_type) { int score= 0; if(suffix) { if(!strcmp((const char*)suffix, "rdf") || !strcmp((const char*)suffix, "rdfs") || !strcmp((const char*)suffix, "foaf") || !strcmp((const char*)suffix, "doap") || !strcmp((const char*)suffix, "owl") || !strcmp((const char*)suffix, "daml")) score=9; if(!strcmp((const char*)suffix, "rss")) score=3; } if(identifier) { if(strstr((const char*)identifier, "rss1")) score+=5; else if(!suffix && strstr((const char*)identifier, "rss")) score+=3; else if(!suffix && strstr((const char*)identifier, "rdf")) score+=2; else if(!suffix && strstr((const char*)identifier, "RDF")) score+=2; } if(mime_type) { if(strstr((const char*)mime_type, "html")) score-= 4; else if(!strcmp((const char*)mime_type, "text/rdf")) score+= 7; else if(!strcmp((const char*)mime_type, "application/xml")) score+= 5; } if(buffer && len) { /* Check it's an XML namespace declared and not N3 or Turtle which * mention the namespace URI but not in this form. */ #define HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "") != NULL) #define HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "") != NULL) #define HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL) #define HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL) #define HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL) #define HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "context; if(rdf_parser->failed) return 1; return raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end); } static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const unsigned char *predicate_id, raptor_identifier_type predicate_type, const raptor_uri_source predicate_uri_source, int predicate_ordinal, raptor_uri *object_uri, const unsigned char *object_id, const raptor_identifier_type object_type, const raptor_uri_source object_uri_source, raptor_uri *literal_datatype, raptor_identifier *reified, raptor_rdfxml_element* bag_element) { raptor_statement *statement=&rdf_parser->statement; const unsigned char *language=NULL; static const char empty_literal[1]=""; raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; char *reified_id=NULL; raptor_uri* uri1=NULL; raptor_uri* uri2=NULL; if(rdf_parser->failed) return; if((object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL || object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) && !literal_datatype) { language=raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2); if(!object_uri) object_uri=(raptor_uri*)empty_literal; } statement->subject=subject_uri ? (void*)subject_uri : (void*)subject_id; statement->subject_type=subject_type; statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; if(predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) { /* new URI object */ uri1=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, predicate_ordinal); predicate_uri=uri1; predicate_id=NULL; predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE; } statement->predicate=predicate_uri; statement->object=object_uri ? (void*)object_uri : (void*)object_id; statement->object_type=object_type; statement->object_literal_language=language; statement->object_literal_datatype=literal_datatype; #ifdef RAPTOR_DEBUG_VERBOSE fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: "); raptor_print_statement(statement, stderr); fputc('\n', stderr); if(!(subject_uri||subject_id)) RAPTOR_FATAL1("Statement has no subject\n"); if(!(predicate_uri||predicate_id)) RAPTOR_FATAL1("Statement has no predicate\n"); if(!(object_uri||object_id)) RAPTOR_FATAL1("Statement has no object\n"); #endif if(!rdf_parser->statement_handler) goto generate_tidy; /* Generate the statement; or is it fact? */ (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); /* the bagID mess */ if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID] && bag_element && (bag_element->bag.uri || bag_element->bag.id)) { raptor_identifier* bag=&bag_element->bag; statement->subject=bag->uri ? (void*)bag->uri : (void*)bag->id; statement->subject_type=bag->type; bag_element->last_bag_ordinal++; /* new URI object */ uri2=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, bag_element->last_bag_ordinal); statement->predicate=uri2; if(reified && (reified->uri || reified->id)) { statement->object=reified->uri ? (void*)reified->uri : (void*)reified->id; statement->object_type=reified->type; } else { /* reified may be NULL so do not use it */ reified_id=(char*)raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); statement->object=reified_id; statement->object_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; } (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); } else if(!reified || (!reified->uri && !reified->id)) goto generate_tidy; /* generate reified statements */ statement->subject_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; statement->object_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; statement->object_literal_language=NULL; if(reified_id) { /* reified may be NULL so do not use it */ statement->subject=reified_id; statement->subject_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; } else { statement->subject=reified->uri ? (void*)reified->uri : (void*)reified->id; statement->subject_type=reified->type; } statement->predicate=RAPTOR_RDF_type_URI(rdf_xml_parser); statement->object=RAPTOR_RDF_Statement_URI(rdf_xml_parser); (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); statement->predicate=RAPTOR_RDF_subject_URI(rdf_xml_parser); statement->object=subject_uri ? (void*)subject_uri : (void*)subject_id; statement->object_type=subject_type; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); statement->predicate=RAPTOR_RDF_predicate_URI(rdf_xml_parser); statement->object=predicate_uri ? (void*)predicate_uri : (void*)predicate_id; statement->object_type=predicate_type; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); statement->predicate=RAPTOR_RDF_object_URI(rdf_xml_parser); statement->object=object_uri ? (void*)object_uri : (void*)object_id; statement->object_type=object_type; statement->object_literal_language=language; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); generate_tidy: /* Tidy up things allocated here */ if(reified_id) RAPTOR_FREE(cstring, reified_id); if(uri1) raptor_free_uri_v2(rdf_parser->world, uri1); if(uri2) raptor_free_uri_v2(rdf_parser->world, uri2); } /** * raptor_rdfxml_element_has_property_attributes: * @element: element with the property attributes * * Return true if the element has at least one property attribute. * **/ static int raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element) { int i; if(element->xml_element->attribute_count >0) return 1; /* look for rdf: properties */ for(i=0; i<= RDF_ATTR_LAST; i++) { if(element->rdf_attr[i] && rdf_syntax_terms_info[i].type != RAPTOR_IDENTIFIER_TYPE_UNKNOWN) return 1; } return 0; } /** * raptor_rdfxml_process_property_attributes: * @rdf_parser: Raptor parser object * @attributes_element: element with the property attributes * @resource_element: element that defines the resource URI * subject_uri, subject_uri_source etc. * @property_node_identifier: Use this identifier for the resource URI * and count any ordinals for it locally * * Process the property attributes for an element for a given resource. * **/ static void raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser, raptor_rdfxml_element *attributes_element, raptor_rdfxml_element *resource_element, raptor_identifier *property_node_identifier) { unsigned int i; raptor_identifier *resource_identifier; resource_identifier=property_node_identifier ? property_node_identifier : &resource_element->subject; /* Process attributes as propAttr* = * (propName="string")* */ for(i=0; i < attributes_element->xml_element->attribute_count; i++) { raptor_qname* attr=attributes_element->xml_element->attributes[i]; const unsigned char *name; const unsigned char *value; int handled=0; if(!attr) continue; name=attr->local_name; value = attr->value; if(!attr->nspace) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Using property attribute '%s' without a namespace is forbidden.", name); continue; } if(!raptor_utf8_is_nfc(value, strlen((const char*)value))) { const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s"; raptor_rdfxml_update_document_locator(rdf_parser); if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]) raptor_parser_error(rdf_parser, message, name, value); else raptor_parser_warning(rdf_parser, message, name, value); continue; } /* Generate the property statement using one of these properties: * 1) rdf:_n * 2) the URI from the rdf:* attribute where allowed * 3) otherwise forbidden (including rdf:li) */ if(attr->nspace->is_rdf_ms) { /* is rdf: namespace */ int ordinal=0; if(*name == '_') { /* recognise rdf:_ */ name++; ordinal=raptor_check_ordinal(name); if(ordinal < 1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", ordinal, attr->local_name, name); ordinal=1; } } else { raptor_rdfxml_update_document_locator(rdf_parser); if(raptor_rdfxml_forbidden_propertyAttribute_name((const char*)name) > 0) raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name); else raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.", name); } if(ordinal >= 1) { /* Generate an ordinal property when there are no problems */ raptor_rdfxml_generate_statement(rdf_parser, resource_identifier->uri, resource_identifier->id, resource_identifier->type, resource_identifier->uri_source, NULL, NULL, RAPTOR_IDENTIFIER_TYPE_ORDINAL, RAPTOR_URI_SOURCE_NOT_URI, ordinal, (raptor_uri*)value, NULL, RAPTOR_IDENTIFIER_TYPE_LITERAL, RAPTOR_URI_SOURCE_NOT_URI, NULL, NULL, /* Property attributes are never reified*/ resource_element); handled=1; } } /* end is RDF namespace property */ if(!handled) /* else not rdf: namespace or unknown in rdf: namespace so * generate a statement with a literal object */ raptor_rdfxml_generate_statement(rdf_parser, resource_identifier->uri, resource_identifier->id, resource_identifier->type, resource_identifier->uri_source, attr->uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_ATTRIBUTE, 0, (raptor_uri*)value, NULL, RAPTOR_IDENTIFIER_TYPE_LITERAL, RAPTOR_URI_SOURCE_NOT_URI, NULL, NULL, /* Property attributes are never reified*/ resource_element); } /* end for ... attributes */ /* Handle rdf property attributes * (only rdf:type and rdf:value at present) */ for(i=0; i<= RDF_ATTR_LAST; i++) { const unsigned char *value=attributes_element->rdf_attr[i]; int object_is_literal=(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_LITERAL); raptor_uri *property_uri, *object_uri; raptor_identifier_type object_type; if(!value) continue; if(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) { const char *name=rdf_syntax_terms_info[i].name; if(raptor_rdfxml_forbidden_propertyAttribute_name(name)) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name); continue; } } if(object_is_literal && !raptor_utf8_is_nfc(value, strlen((const char*)value))) { const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s"; raptor_rdfxml_update_document_locator(rdf_parser); if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]) raptor_parser_error(rdf_parser, message, rdf_syntax_terms_info[i].name, value); else raptor_parser_warning(rdf_parser, message, rdf_syntax_terms_info[i].name, value); continue; } property_uri=raptor_new_uri_for_rdf_concept_v2(rdf_parser->world, (rdf_syntax_terms_info[i].name)); object_uri=object_is_literal ? (raptor_uri*)value : raptor_new_uri_relative_to_base_v2(rdf_parser->world, raptor_rdfxml_inscope_base_uri(rdf_parser), value); object_type=object_is_literal ? RAPTOR_IDENTIFIER_TYPE_LITERAL : RAPTOR_IDENTIFIER_TYPE_RESOURCE; raptor_rdfxml_generate_statement(rdf_parser, resource_identifier->uri, resource_identifier->id, resource_identifier->type, resource_identifier->uri_source, property_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_ATTRIBUTE, 0, object_uri, NULL, object_type, RAPTOR_URI_SOURCE_NOT_URI, NULL, NULL, /* Property attributes are never reified*/ resource_element); if(!object_is_literal) raptor_free_uri_v2(rdf_parser->world, object_uri); raptor_free_uri_v2(rdf_parser->world, property_uri); } /* end for rdf:property values */ } static void raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser, raptor_rdfxml_element *element) { int finished; raptor_state state; raptor_xml_element* xml_element=element->xml_element; const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name; int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace && raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms); raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; int rc=0; raptor_uri* base_uri; state=element->state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); #endif base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser); finished= 0; while(!finished) { switch(state) { case RAPTOR_STATE_SKIPPING: element->child_state=state; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; finished=1; break; case RAPTOR_STATE_UNKNOWN: /* found ? */ if(element_in_rdf_ns) { if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) { element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; /* Yes - need more content before can continue, * so wait for another element */ finished=1; break; } if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser))) { state=RAPTOR_STATE_DESCRIPTION; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; /* Yes - found something so move immediately to description */ break; } if(element_in_rdf_ns && (rc=raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) { if(rc > 0) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } else raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } } /* If scanning for element, can continue */ if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) { finished=1; break; } /* Otherwise the choice of the next state can be made * from the current element by the OBJ state */ state=RAPTOR_STATE_NODE_ELEMENT_LIST; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; break; case RAPTOR_STATE_NODE_ELEMENT_LIST: /* Handling * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList * * Everything goes to nodeElement */ state=RAPTOR_STATE_NODE_ELEMENT; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; break; case RAPTOR_STATE_DESCRIPTION: case RAPTOR_STATE_NODE_ELEMENT: case RAPTOR_STATE_PARSETYPE_RESOURCE: case RAPTOR_STATE_PARSETYPE_COLLECTION: /* Handling or other node element * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement * * or a property element acting as a node element for * rdf:parseType="Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt * or rdf:parseType="Collection" (and daml:Collection) * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt * * Only create a bag if bagID given */ if(!raptor_xml_element_get_name(xml_element)->uri) { /* We cannot handle this */ raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.", raptor_xml_element_get_name(xml_element)->local_name); raptor_rdfxml_update_document_locator(rdf_parser); element->state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(element_in_rdf_ns && (rc = raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) { if(rc > 0) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name); state=RAPTOR_STATE_SKIPPING; element->state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } else raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } if(element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && element->parent && (element->parent->state == RAPTOR_STATE_PROPERTYELT || element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) && element->parent->xml_element->content_element_seen > 1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "The enclosing property already has an object"); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(state == RAPTOR_STATE_NODE_ELEMENT || state == RAPTOR_STATE_DESCRIPTION || state == RAPTOR_STATE_PARSETYPE_COLLECTION) { if(element_in_rdf_ns && raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser))) state=RAPTOR_STATE_DESCRIPTION; else state=RAPTOR_STATE_NODE_ELEMENT; } if((element->rdf_attr[RDF_ATTR_ID]!=NULL) + (element->rdf_attr[RDF_ATTR_about]!=NULL) + (element->rdf_attr[RDF_ATTR_nodeID]!=NULL)>1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name); } if(element->rdf_attr[RDF_ATTR_ID]) { element->subject.id=element->rdf_attr[RDF_ATTR_ID]; element->rdf_attr[RDF_ATTR_ID]=NULL; element->subject.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->subject.id); if(!element->subject.uri) goto oom; element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->subject.uri_source=RAPTOR_URI_SOURCE_ID; if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->subject.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, element->subject.id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->subject.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } else if (element->rdf_attr[RDF_ATTR_about]) { element->subject.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_about]); RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_about]); element->rdf_attr[RDF_ATTR_about]=NULL; if(!element->subject.uri) goto oom; element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->subject.uri_source=RAPTOR_URI_SOURCE_URI; } else if (element->rdf_attr[RDF_ATTR_nodeID]) { element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]); element->rdf_attr[RDF_ATTR_nodeID]=NULL; if(!element->subject.id) goto oom; element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->subject.uri_source=RAPTOR_URI_SOURCE_BLANK_ID; if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->subject.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } else if (element->parent && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && (element->parent->object.uri || element->parent->object.id)) { /* copy from parent (property element), it has a URI for us */ raptor_copy_identifier(&element->subject, &element->parent->object); } else { element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); if(!element->subject.id) goto oom; element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED; } if(element->rdf_attr[RDF_ATTR_bagID]) { if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) { element->bag.id=element->rdf_attr[RDF_ATTR_bagID]; element->rdf_attr[RDF_ATTR_bagID]=NULL; element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id); if(!element->bag.uri) goto oom; element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED; if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); raptor_rdfxml_generate_statement(rdf_parser, element->bag.uri, element->bag.id, element->bag.type, element->bag.uri_source, RAPTOR_RDF_type_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, RAPTOR_RDF_Bag_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_NOT_URI, NULL, NULL, NULL); } else { /* bagID forbidden */ raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } if(element->parent) { /* In a rdf:parseType="Collection" the resources are appended * to the list at the genid element->parent->tail_id */ if (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { const unsigned char * idList = raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); /* rdf:type rdf:List */ raptor_uri *collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_List_URI(rdf_xml_parser) : RAPTOR_RDF_List_URI(rdf_xml_parser); if(!idList) goto oom; if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) || rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST]) raptor_rdfxml_generate_statement(rdf_parser, NULL, idList, RAPTOR_IDENTIFIER_TYPE_ANONYMOUS, RAPTOR_URI_SOURCE_ID, RAPTOR_RDF_type_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, collection_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, NULL, NULL, element); collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_xml_parser); /* rdf:first uri> */ raptor_rdfxml_generate_statement(rdf_parser, NULL, idList, RAPTOR_IDENTIFIER_TYPE_ANONYMOUS, RAPTOR_URI_SOURCE_ID, collection_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, element->subject.uri, element->subject.id, element->subject.type, element->subject.uri_source, NULL, NULL, NULL); /* If there is no rdf:parseType="Collection" */ if (!element->parent->tail_id) { int len; unsigned char *new_id; /* Free any existing object URI still around * I suspect this can never happen. */ if(element->parent->object.uri) raptor_free_uri_v2(rdf_parser->world, element->parent->object.uri); len=strlen((char*)idList); new_id=(unsigned char*)RAPTOR_MALLOC(cstring, len+1); if(!len) { if(new_id) RAPTOR_FREE(cstring, new_id); return; } strncpy((char*)new_id, (char*)idList, len+1); element->parent->object.id=new_id; element->parent->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->parent->object.uri_source=RAPTOR_URI_SOURCE_ID; } else { collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser); /* _:tail_id rdf:rest _:listRest */ raptor_rdfxml_generate_statement(rdf_parser, NULL, element->parent->tail_id, RAPTOR_IDENTIFIER_TYPE_ANONYMOUS, RAPTOR_URI_SOURCE_ID, collection_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, NULL, idList, RAPTOR_IDENTIFIER_TYPE_ANONYMOUS, RAPTOR_URI_SOURCE_ID, NULL, NULL, NULL); } /* update new tail */ if(element->parent->tail_id) RAPTOR_FREE(cstring, (char*)element->parent->tail_id); element->parent->tail_id=idList; } else if(element->parent->state != RAPTOR_STATE_UNKNOWN && element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) { /* If there is a parent element (property) containing this * element (node) and it has no object, set it from this subject */ if(element->parent->object.uri) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Tried to set multiple objects of a statement"); } else { /* Store URI of this node in our parent as the property object */ raptor_copy_identifier(&element->parent->object, &element->subject); element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } } } /* If this is a node element, generate the rdf:type statement * from this node */ if(state == RAPTOR_STATE_NODE_ELEMENT) raptor_rdfxml_generate_statement(rdf_parser, element->subject.uri, element->subject.id, element->subject.type, element->subject.uri_source, RAPTOR_RDF_type_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, raptor_xml_element_get_name(xml_element)->uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, element->object.uri_source, NULL, &element->reified, element); raptor_rdfxml_process_property_attributes(rdf_parser, element, element, NULL); /* for both productions now need some more content or * property elements before can do any more work. */ element->child_state=RAPTOR_STATE_PROPERTYELT; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; finished=1; break; case RAPTOR_STATE_PARSETYPE_OTHER: /* FALLTHROUGH */ case RAPTOR_STATE_PARSETYPE_LITERAL: raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element); element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; finished=1; break; /* Handle all the detail of the various options of property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt * * All the attributes must be scanned here to see what additional * property element work is needed. No triples are generated * until the end of this element, until it is clear if the * element was empty. */ case RAPTOR_STATE_MEMBER_PROPERTYELT: case RAPTOR_STATE_PROPERTYELT: if(!raptor_xml_element_get_name(xml_element)->uri) { raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); raptor_rdfxml_update_document_locator(rdf_parser); element->state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } /* Handling rdf:li as a property, noting special processing */ if(element_in_rdf_ns && raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) { state=RAPTOR_STATE_MEMBER_PROPERTYELT; } if(element_in_rdf_ns && (rc = raptor_rdfxml_forbidden_propertyElement_name((const char*)el_name))) { if(rc > 0) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a property element.", el_name); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } else raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } /* rdf:ID on a property element - reify a statement. * Allowed on all property element forms */ if(element->rdf_attr[RDF_ATTR_ID]) { element->reified.id=element->rdf_attr[RDF_ATTR_ID]; element->rdf_attr[RDF_ATTR_ID]=NULL; element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id); if(!element->reified.uri) goto oom; element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED; if(!raptor_valid_xml_ID(rdf_parser, element->reified.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->reified.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified.id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->reified.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } /* rdf:datatype on a property element. * Only allowed for * http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt */ if (element->rdf_attr[RDF_ATTR_datatype]) { element->object_literal_datatype=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_datatype]); RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_datatype]); element->rdf_attr[RDF_ATTR_datatype]=NULL; if(!element->object_literal_datatype) goto oom; } if(element->rdf_attr[RDF_ATTR_bagID]) { if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) { if(element->rdf_attr[RDF_ATTR_resource] || element->rdf_attr[RDF_ATTR_parseType]) { raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name); /* prevent this being used later either */ element->rdf_attr[RDF_ATTR_bagID]=NULL; } else { element->bag.id=element->rdf_attr[RDF_ATTR_bagID]; element->rdf_attr[RDF_ATTR_bagID]=NULL; element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id); if(!element->bag.uri) goto oom; element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED; if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); } } else { /* bagID forbidden */ raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } /* if rdf:bagID on property element */ element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; if (element->rdf_attr[RDF_ATTR_parseType]) { const unsigned char *parse_type=element->rdf_attr[RDF_ATTR_parseType]; int i; int is_parseType_Literal=0; if(raptor_rdfxml_element_has_property_attributes(element)) { raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } /* Check for bad combinations of things with parseType */ for(i=0; i<= RDF_ATTR_LAST; i++) if(element->rdf_attr[i] && i != RDF_ATTR_parseType) { raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", rdf_syntax_terms_info[i].name, parse_type); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(!strcmp((char*)parse_type, "Literal")) is_parseType_Literal=1; else if (!strcmp((char*)parse_type, "Resource")) { state=RAPTOR_STATE_PARSETYPE_RESOURCE; element->child_state=RAPTOR_STATE_PROPERTYELT; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; /* create a node for the subject of the contained properties */ element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); if(!element->subject.id) goto oom; element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED; } else if(!strcmp((char*)parse_type, "Collection")) { /* An rdf:parseType="Collection" appears as a single node */ element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION; } else { if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES] && !raptor_strcasecmp((char*)parse_type, "daml:collection")) { /* A DAML collection appears as a single node */ element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION; } else { if(rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]) { raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type); } is_parseType_Literal=1; } } if(is_parseType_Literal) { /* rdf:parseType="Literal" - explicitly or default * if the parseType value is not recognised */ rdf_xml_parser->xml_content=NULL; rdf_xml_parser->xml_content_length=0; rdf_xml_parser->iostream=raptor_new_iostream_to_string(&rdf_xml_parser->xml_content, &rdf_xml_parser->xml_content_length, raptor_alloc_memory); if(!rdf_xml_parser->iostream) goto oom; rdf_xml_parser->xml_writer=raptor_new_xml_writer_v2(rdf_parser->world, NULL, rdf_xml_parser->iostream, (raptor_simple_message_handler)raptor_parser_simple_error, rdf_parser, 1); if(!rdf_xml_parser->xml_writer) goto oom; raptor_xml_writer_set_feature(rdf_xml_parser->xml_writer, RAPTOR_FEATURE_WRITER_XML_DECLARATION, 0); element->child_state=RAPTOR_STATE_PARSETYPE_LITERAL; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; } } else { /* Can only be the empty property element case * http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt */ /* The presence of the rdf:resource or rdf:nodeID * attributes is checked at element close time */ /* * Assign reified URI here so we don't reify property attributes * using this id */ if(element->reified.id && !element->reified.uri) { element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id); if(!element->reified.uri) goto oom; element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED; } if(element->rdf_attr[RDF_ATTR_resource] || element->rdf_attr[RDF_ATTR_nodeID]) { /* Done - wait for end of this element to end in order to * check the element was empty as expected */ element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } else { /* Otherwise process content in obj (value) state */ element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST; element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; } } finished=1; break; case RAPTOR_STATE_INVALID: default: raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state)); finished=1; } /* end switch */ if(state != element->state) { element->state=state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state)); #endif } } /* end while */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif return; oom: raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); element->state=RAPTOR_STATE_SKIPPING; } static void raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser, raptor_rdfxml_element *element) { raptor_state state; int finished; raptor_xml_element* xml_element=element->xml_element; const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name; int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace && raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms); raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; state=element->state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); #endif finished= 0; while(!finished) { switch(state) { case RAPTOR_STATE_SKIPPING: finished=1; break; case RAPTOR_STATE_UNKNOWN: finished=1; break; case RAPTOR_STATE_NODE_ELEMENT_LIST: if(element_in_rdf_ns && raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) { /* end of RDF - boo hoo */ state=RAPTOR_STATE_UNKNOWN; finished=1; break; } /* When scanning, another element ending is outside the RDF * world so this can happen without further work */ if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) { state=RAPTOR_STATE_UNKNOWN; finished=1; break; } /* otherwise found some junk after RDF content in an RDF-only * document (probably never get here since this would be * a mismatched XML tag and cause an error earlier) */ raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_warning(rdf_parser, "Element '%s' ended, expected end of RDF element", el_name); state=RAPTOR_STATE_UNKNOWN; finished=1; break; case RAPTOR_STATE_DESCRIPTION: case RAPTOR_STATE_NODE_ELEMENT: case RAPTOR_STATE_PARSETYPE_RESOURCE: /* If there is a parent element containing this element and * the parent isn't a description, has an identifier, * create the statement between this node using parent property * (Need to check for identifier so that top-level typed nodes * don't get connect to parent element) */ if(state == RAPTOR_STATE_NODE_ELEMENT && element->parent && (element->parent->subject.uri || element->parent->subject.id)) raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, element->parent->subject.uri_source, raptor_xml_element_get_name(element->parent->xml_element)->uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_ELEMENT, 0, element->subject.uri, element->subject.id, element->subject.type, element->subject.uri_source, NULL, NULL, element); else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE && element->parent && (element->parent->subject.uri || element->parent->subject.id)) { /* Handle rdf:li as the rdf:parseType="resource" property */ if(element_in_rdf_ns && raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) { element->parent->last_ordinal++; raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, element->parent->subject.uri_source, NULL, NULL, RAPTOR_IDENTIFIER_TYPE_ORDINAL, RAPTOR_URI_SOURCE_NOT_URI, element->parent->last_ordinal, element->subject.uri, element->subject.id, element->subject.type, element->subject.uri_source, NULL, &element->reified, element->parent); } else { raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, element->parent->subject.uri_source, raptor_xml_element_get_name(xml_element)->uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_ELEMENT, 0, element->subject.uri, element->subject.id, element->subject.type, element->subject.uri_source, NULL, &element->reified, element->parent); } } finished=1; break; case RAPTOR_STATE_PARSETYPE_COLLECTION: finished=1; break; case RAPTOR_STATE_PARSETYPE_OTHER: /* FALLTHROUGH */ case RAPTOR_STATE_PARSETYPE_LITERAL: element->parent->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element); finished=1; break; case RAPTOR_STATE_PROPERTYELT: case RAPTOR_STATE_MEMBER_PROPERTYELT: /* A property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt * * Literal content part is handled here. * The element content is handled in the internal states * Empty content is checked here. */ if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { if(xml_element->content_cdata_seen) element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; else if (xml_element->content_element_seen) element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; else { /* Empty Literal */ element->object.type= RAPTOR_IDENTIFIER_TYPE_LITERAL; element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; } } /* Handle terminating a rdf:parseType="Collection" list */ if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { raptor_uri* nil_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_nil_URI(rdf_xml_parser) : RAPTOR_RDF_nil_URI(rdf_xml_parser); if (!element->tail_id) { /* If No List: set object of statement to rdf:nil */ element->object.uri= raptor_uri_copy_v2(rdf_parser->world, nil_uri); element->object.id= NULL; element->object.type= RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->object.uri_source= RAPTOR_URI_SOURCE_URI; } else { raptor_uri* rest_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser); /* terminate the list */ raptor_rdfxml_generate_statement(rdf_parser, NULL, element->tail_id, RAPTOR_IDENTIFIER_TYPE_ANONYMOUS, RAPTOR_URI_SOURCE_ID, rest_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, nil_uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, NULL, NULL, NULL); } } /* end rdf:parseType="Collection" termination */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif switch(element->content_type) { case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE: if(raptor_rdfxml_element_has_property_attributes(element) && element->child_state == RAPTOR_STATE_DESCRIPTION) { raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(element->object.type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) { if(element->rdf_attr[RDF_ATTR_resource]) { element->object.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world, raptor_rdfxml_inscope_base_uri(rdf_parser), (const unsigned char*)element->rdf_attr[RDF_ATTR_resource]); RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_resource]); element->rdf_attr[RDF_ATTR_resource]=NULL; if(!element->object.uri) goto oom; element->object.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; element->object.uri_source=RAPTOR_URI_SOURCE_URI; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } else if(element->rdf_attr[RDF_ATTR_nodeID]) { element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]); element->rdf_attr[RDF_ATTR_nodeID]=NULL; if(!element->object.id) goto oom; element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->object.uri_source=RAPTOR_URI_SOURCE_BLANK_ID; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; if(!raptor_valid_xml_ID(rdf_parser, element->object.id)) { raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->object.id); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } } else { element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); if(!element->object.id) goto oom; element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } raptor_rdfxml_process_property_attributes(rdf_parser, element, element->parent, &element->object); } /* We know object is a resource, so delete any unsignficant * whitespace so that FALLTHROUGH code below finds the object. */ if(xml_element->content_cdata_length) { raptor_free_stringbuffer(xml_element->content_cdata_sb); xml_element->content_cdata_sb=NULL; xml_element->content_cdata_length=0; } /* FALLTHROUGH */ case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL: if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) { /* Only an empty literal can have a rdf:bagID */ if(element->bag.uri || element->bag.id) { if(xml_element->content_cdata_length > 0) { raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name); /* prevent this being used later either */ element->rdf_attr[RDF_ATTR_bagID]=NULL; } else raptor_rdfxml_generate_statement(rdf_parser, element->bag.uri, element->bag.id, element->bag.type, element->bag.uri_source, RAPTOR_RDF_type_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_URI, 0, RAPTOR_RDF_Bag_URI(rdf_xml_parser), NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_NOT_URI, NULL, NULL, NULL); } } /* if rdf:bagID */ /* If there is empty literal content with properties * generate a node to hang properties off */ if(raptor_rdfxml_element_has_property_attributes(element) && xml_element->content_cdata_length > 0) { raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name); state=RAPTOR_STATE_SKIPPING; element->child_state=RAPTOR_STATE_SKIPPING; finished=1; break; } if(element->object.type == RAPTOR_IDENTIFIER_TYPE_LITERAL && raptor_rdfxml_element_has_property_attributes(element) && !element->object.uri) { element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL); if(!element->object.id) goto oom; element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS; element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } raptor_rdfxml_process_property_attributes(rdf_parser, element, element, &element->object); } /* just be friendly to older compilers and don't declare * variables in the middle of a block */ if(1) { raptor_uri *predicate_uri=NULL; raptor_identifier_type predicate_type; int predicate_ordinal=0; raptor_uri *object_uri; raptor_identifier_type object_type; raptor_uri *literal_datatype=NULL; const unsigned char* empty_literal=(const unsigned char*)""; if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { element->parent->last_ordinal++; predicate_ordinal=element->parent->last_ordinal; predicate_type=RAPTOR_IDENTIFIER_TYPE_ORDINAL; } else { predicate_uri=raptor_xml_element_get_name(xml_element)->uri; predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE; } if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { unsigned char* literal; object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL; literal=raptor_stringbuffer_as_string(xml_element->content_cdata_sb); literal_datatype=element->object_literal_datatype; if(!literal_datatype && literal && !raptor_utf8_is_nfc(literal, xml_element->content_cdata_length)) { const char *message="Property element '%s' has a string not in Unicode Normal Form C: %s"; raptor_rdfxml_update_document_locator(rdf_parser); if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]) raptor_parser_error(rdf_parser, message, el_name, literal); else raptor_parser_warning(rdf_parser, message, el_name, literal); } if(!literal) /* empty literal */ literal=(unsigned char*)empty_literal; object_uri=(raptor_uri*)literal; } else { object_type=element->object.type; object_uri=element->object.uri; } raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, RAPTOR_URI_SOURCE_ELEMENT, predicate_uri, NULL, predicate_type, RAPTOR_URI_SOURCE_NOT_URI, predicate_ordinal, object_uri, element->object.id, object_type, element->object.uri_source, literal_datatype, &element->reified, element->parent); } break; case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL: { unsigned char *buffer; unsigned int length; if(rdf_xml_parser->xml_writer) { raptor_xml_writer_flush(rdf_xml_parser->xml_writer); raptor_free_iostream(rdf_xml_parser->iostream); rdf_xml_parser->iostream=NULL; buffer=(unsigned char*)rdf_xml_parser->xml_content; length=rdf_xml_parser->xml_content_length; } else { buffer=raptor_stringbuffer_as_string(xml_element->content_cdata_sb); length=xml_element->content_cdata_length; } if(!raptor_utf8_is_nfc(buffer, length)) { const char *message="Property element '%s' has XML literal content not in Unicode Normal Form C: %s"; raptor_rdfxml_update_document_locator(rdf_parser); if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL]) raptor_parser_error(rdf_parser, message, el_name, buffer); else raptor_parser_warning(rdf_parser, message, el_name, buffer); } if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { element->parent->last_ordinal++; raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, element->parent->subject.uri_source, NULL, NULL, RAPTOR_IDENTIFIER_TYPE_ORDINAL, RAPTOR_URI_SOURCE_NOT_URI, element->parent->last_ordinal, (raptor_uri*)buffer, NULL, RAPTOR_IDENTIFIER_TYPE_LITERAL, RAPTOR_URI_SOURCE_NOT_URI, RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser), &element->reified, element->parent); } else { raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject.uri, element->parent->subject.id, element->parent->subject.type, element->parent->subject.uri_source, raptor_xml_element_get_name(xml_element)->uri, NULL, RAPTOR_IDENTIFIER_TYPE_RESOURCE, RAPTOR_URI_SOURCE_ELEMENT, 0, (raptor_uri*)buffer, NULL, RAPTOR_IDENTIFIER_TYPE_LITERAL, RAPTOR_URI_SOURCE_NOT_URI, RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser), &element->reified, element->parent); } /* Finish the xml writer iostream for parseType="Literal" */ if(rdf_xml_parser->xml_writer) { raptor_free_xml_writer(rdf_xml_parser->xml_writer); RAPTOR_FREE(cstring, rdf_xml_parser->xml_content); rdf_xml_parser->xml_content=NULL; rdf_xml_parser->xml_content_length=0; } } break; case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST: default: raptor_parser_fatal_error(rdf_parser, "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)", __func__, raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); } /* end switch */ finished=1; break; case RAPTOR_STATE_INVALID: default: raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state)); finished=1; } /* end switch */ if(state != element->state) { element->state=state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state)); #endif } } /* end while */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif return; oom: raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); element->state=RAPTOR_STATE_SKIPPING; } static void raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser, const unsigned char *s, int len, int is_cdata) { raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; raptor_xml_element* xml_element; raptor_state state; int all_whitespace=1; int i; rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; if(rdf_parser->failed) return; #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata); (void)fwrite(s, 1, len, stderr); fprintf(stderr, "' (%d bytes)\n", len); #endif for(i=0; icurrent_element; /* this file is very broke - probably not XML, whatever */ if(!element) return; xml_element=element->xml_element; raptor_rdfxml_update_document_locator(rdf_parser); /* cdata never changes the parser state * and the containing element state always determines what to do. * Use the child_state first if there is one, since that applies */ state=element->child_state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state)); #endif #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif if(state == RAPTOR_STATE_SKIPPING) return; if(state == RAPTOR_STATE_UNKNOWN) { /* Ignore all cdata if still looking for RDF */ if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) return; /* Ignore all whitespace cdata before first element */ if(all_whitespace) return; /* This probably will never happen since that would make the * XML not be well-formed */ raptor_parser_warning(rdf_parser, "Character data before RDF element."); } if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) { /* If found non-whitespace content, move to literal content */ if(!all_whitespace) element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; } if(!rdf_content_type_info[element->child_content_type].whitespace_significant) { /* Whitespace is ignored except for literal or preserved content types */ if(all_whitespace) { #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name); #endif return; } if(xml_element->content_cdata_seen && xml_element->content_element_seen) { /* Uh oh - mixed content, this element has elements too */ raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); } } if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif } if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len); else { raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb, s, len, 1); element->content_cdata_all_whitespace &= all_whitespace; /* adjust stored length */ xml_element->content_cdata_length += len; } #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length); #endif #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif } /** * raptor_rdfxml_inscope_base_uri: * @rdf_parser: Raptor parser object * * Return the in-scope base URI. * * Looks for the innermost xml:base on an element or document URI * * Return value: The URI string value or NULL on failure. **/ static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser) { raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; raptor_uri* base_uri; base_uri=raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2); if(!base_uri) base_uri=rdf_parser->base_uri; return base_uri; } /** * raptor_rdfxml_record_ID: * @rdf_parser: Raptor parser object * @element: Current element * @id: ID string * * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already. * * Record and check the ID values, if they have been seen already. * per in-scope-base URI. * * Return value: non-zero if already seen, or failure **/ static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id) { raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; raptor_uri* base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser); size_t id_len=strlen((const char*)id); int rc; if(!rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]) return 0; rc=raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len); return (rc != 0); } static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser) { raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context; raptor_sax2_update_document_locator(rdf_xml_parser->sax2, &rdf_parser->locator); } static void raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory) { } static int raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory) { int rc=0; factory->context_length = sizeof(raptor_rdfxml_parser); factory->need_base_uri = 1; factory->init = raptor_rdfxml_parse_init; factory->terminate = raptor_rdfxml_parse_terminate; factory->start = raptor_rdfxml_parse_start; factory->chunk = raptor_rdfxml_parse_chunk; factory->finish_factory = raptor_rdfxml_parse_finish_factory; factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax; rc+= raptor_parser_factory_add_alias(factory, "raptor") != 0; rc+= raptor_parser_factory_add_uri(factory, (const unsigned char*)"http://www.w3.org/TR/rdf-syntax-grammar") != 0; rc+= raptor_parser_factory_add_mime_type(factory, "application/rdf+xml", 10) != 0; rc+= raptor_parser_factory_add_mime_type(factory, "text/rdf", 6) != 0; return rc; } int raptor_init_parser_rdfxml(raptor_world* world) { return !raptor_parser_register_factory(world, "rdfxml", "RDF/XML", &raptor_rdfxml_parser_register_factory); } #if RAPTOR_DEBUG > 1 void raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser, FILE *stream) { fputs("rdf:ID set ", stream); raptor_id_set_stats_print(rdf_xml_parser->id_set, stream); } #endif