/**
* Copyright 2008 Digital Bazaar, Inc.
*
* This file is part of librdfa.
*
* librdfa is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* librdfa is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with librdfa. If not, see .
*
* The librdfa library is the Fastest RDFa Parser in the Universe. It is
* a stream parser, meaning that it takes an XML data as input and spits
* out RDF triples as it comes across them in the stream. Due to this
* processing approach, librdfa has a very, very small memory footprint.
* It is also very fast and can operate on hundreds of gigabytes of XML
* data without breaking a sweat.
*
* Usage:
*
* rdfacontext* context = rdfa_create_context(base_uri);
* context->callback_data = your_user_data;
* rdfa_set_triple_handler(context, triple_function);
* rdfa_set_buffer_filler(context, buffer_filler_function);
* rdfa_parse(context);
* rdfa_destroy_context(context);
*/
#ifndef _LIBRDFA_RDFA_H_
#define _LIBRDFA_RDFA_H_
#include
// Activate the stupid Windows DLL exporting mechanism if we're building for Windows
#ifdef WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
#ifdef LIBRDFA_IN_RAPTOR
#ifdef HAVE_CONFIG_H
#include
#endif
#ifdef WIN32
#include
#endif
#include "raptor.h"
#include "raptor_internal.h"
#else
#include
#endif
#ifdef __cplusplus
extern "C"
{
#endif
#define DEBUG 0
#define RDFA_PARSE_WARNING -2
#define RDFA_PARSE_FAILED -1
#define RDFA_PARSE_UNKNOWN 0
#define RDFA_PARSE_SUCCESS 1
#define MAX_URI_MAPPINGS 512
#define MAX_INCOMPLETE_TRIPLES 1024
#define XMLNS_DEFAULT_MAPPING "XMLNS_DEFAULT"
#define RDFA_WHITESPACE " \t\n\v\f\r"
/**
* An RDF resource type is used to denote the content of a triple's
* object value.
*/
typedef enum
{
RDF_TYPE_NAMESPACE_PREFIX,
RDF_TYPE_IRI,
RDF_TYPE_PLAIN_LITERAL,
RDF_TYPE_XML_LITERAL,
RDF_TYPE_TYPED_LITERAL,
RDF_TYPE_UNKNOWN
} rdfresource_t;
/**
* An RDF triple is the result of an RDFa statement that contains, at
* the very least, a subject, a predicate and an object. It is the
* smallest, complete statement one can make in RDF.
*/
typedef struct rdftriple
{
char* subject;
char* predicate;
char* object;
rdfresource_t object_type;
char* datatype;
char* language;
} rdftriple;
/**
* The specification for a callback that is capable of handling
* triples. Produces a triple that must be freed once the application
* is done with the object.
*/
typedef void (*triple_handler_fp)(rdftriple*, void*);
/**
* The specification for a callback that is capable of handling
* triples.
*/
typedef size_t (*buffer_filler_fp)(char*, size_t, void*);
/**
* An RDFA list item is used to hold each datum in an rdfa list. It
* contains a list of flags as well as the data for the list member.
*/
typedef struct rdfalistitem
{
unsigned char flags;
void* data;
} rdfalistitem;
/**
* An RDFa list is used to store multiple text strings that have a set
* of attributes associated with them. These can be lists of CURIEs,
* or lists of incomplete triples. The structure grows with use, but
* cannot be shrunk.
*/
typedef struct rdfalist
{
rdfalistitem** items;
size_t num_items;
size_t max_items;
} rdfalist;
/**
* The RDFa Parser structure is responsible for keeping track of the state of
* the current RDFa parser. Things such as the default namespace,
* CURIE mappings, and other context-specific
*/
typedef struct rdfacontext
{
char* base;
char* parent_subject;
char* parent_object;
#ifndef LIBRDFA_IN_RAPTOR
char** uri_mappings;
#endif
rdfalist* incomplete_triples;
rdfalist* local_incomplete_triples;
char* language;
triple_handler_fp triple_callback;
buffer_filler_fp buffer_filler_callback;
unsigned char recurse;
unsigned char skip_element;
char* new_subject;
char* current_object_resource;
char* content;
char* datatype;
rdfalist* property;
char* plain_literal;
size_t plain_literal_size;
char* xml_literal;
size_t xml_literal_size;
void* callback_data;
/* parse state */
size_t bnode_count;
char* underscore_colon_bnode_name;
unsigned char xml_literal_namespaces_defined;
unsigned char xml_literal_xml_lang_defined;
size_t wb_allocated;
char* working_buffer;
size_t wb_offset;
#ifdef LIBRDFA_IN_RAPTOR
/* a pointer (in every context) to the error_handlers structure
* held in the raptor_parser object */
raptor_error_handlers *error_handlers;
raptor_uri* base_uri;
raptor_sax2* sax2;
raptor_namespace_handler namespace_handler;
void* namespace_handler_user_data;
#else
XML_Parser parser;
#endif
int done;
rdfalist* context_stack;
size_t wb_preread;
int preread;
} rdfacontext;
/**
* Creates an initial context for RDFa.
*
* @param base The base URI that should be used for the parser.
*
* @return a pointer to the base RDFa context, or NULL if memory
* allocation failed.
*/
DLLEXPORT rdfacontext* rdfa_create_context(const char* base);
/**
* Sets the triple handler for the application.
*
* @param context the base rdfa context for the application.
* @param th the triple handler function.
*/
DLLEXPORT void rdfa_set_triple_handler(rdfacontext* context, triple_handler_fp th);
/**
* Sets the buffer filler for the application.
*
* @param context the base rdfa context for the application.
* @param bf the buffer filler function.
*/
DLLEXPORT void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf);
/**
* Starts processing given the base rdfa context.
*
* @param context the base rdfa context.
*
* @return RDFA_PARSE_SUCCESS if everything went well. RDFA_PARSE_FAILED
* if there was a fatal error and RDFA_PARSE_WARNING if there
* was a non-fatal error.
*/
DLLEXPORT int rdfa_parse(rdfacontext* context);
DLLEXPORT int rdfa_parse_start(rdfacontext* context);
DLLEXPORT int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done);
DLLEXPORT void rdfa_parse_end(rdfacontext* context);
DLLEXPORT void rdfa_init_context(rdfacontext* context);
DLLEXPORT char* rdfa_iri_get_base(const char* iri);
/**
* Destroys the given rdfa context by freeing all memory associated
* with the context.
*
* @param context the rdfa context.
*/
DLLEXPORT void rdfa_free_context(rdfacontext* context);
#ifdef __cplusplus
}
#endif
#endif