Parser

Parser — RDF parsers - from a syntax to RDF triples

Synopsis

typedef             raptor_parser;
raptor_parser*      raptor_new_parser                   (const char *name);
raptor_parser*      raptor_new_parser_for_content       (raptor_uri *uri,
                                                         const char *mime_type,
                                                         unsigned char *buffer,
                                                         size_t len,
                                                         unsigned char *identifier);
int                 raptor_start_parse                  (raptor_parser *rdf_parser,
                                                         raptor_uri *uri);
void                raptor_free_parser                  (raptor_parser *parser);
void                raptor_set_fatal_error_handler      (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);
void                raptor_set_error_handler            (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);
void                raptor_set_warning_handler          (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);
void                raptor_set_statement_handler        (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_statement_handler handler);
void                raptor_set_generate_id_handler      (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_generate_id_handler handler);
void                (*raptor_graph_handler)             (void *user_data,
                                                         raptor_uri *graph);
void                raptor_set_graph_handler            (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_graph_handler handler);
void                (*raptor_namespace_handler)         (void *user_data,
                                                         raptor_namespace *nspace);
void                raptor_set_namespace_handler        (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_namespace_handler handler);
raptor_locator*     raptor_get_locator                  (raptor_parser *rdf_parser);
void                raptor_set_default_generate_id_parameters
                                                        (raptor_parser *rdf_parser,
                                                         char *prefix,
                                                         int base);
int                 raptor_parse_chunk                  (raptor_parser *rdf_parser,
                                                         unsigned char *buffer,
                                                         size_t len,
                                                         int is_end);
int                 raptor_parse_file_stream            (raptor_parser *rdf_parser,
                                                         FILE *stream,
                                                         const char *filename,
                                                         raptor_uri *base_uri);
int                 raptor_parse_file                   (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri);
int                 raptor_parse_uri                    (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri);
int                 raptor_parse_uri_with_connection    (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri,
                                                         void *connection);
void                raptor_parse_abort                  (raptor_parser *rdf_parser);
const char*         raptor_get_name                     (raptor_parser *rdf_parser);
const char*         raptor_get_label                    (raptor_parser *rdf_parser);
const char*         raptor_get_mime_type                (raptor_parser *rdf_parser);
int                 raptor_get_need_base_uri            (raptor_parser *rdf_parser);
int                 raptor_set_feature                  (raptor_parser *parser,
                                                         raptor_feature feature,
                                                         int value);
int                 raptor_parser_set_feature_string    (raptor_parser *parser,
                                                         raptor_feature feature,
                                                         unsigned char *value);
int                 raptor_get_feature                  (raptor_parser *parser,
                                                         raptor_feature feature);
const unsigned char* raptor_parser_get_feature_string   (raptor_parser *parser,
                                                         raptor_feature feature);
void                raptor_set_parser_strict            (raptor_parser *rdf_parser,
                                                         int is_strict);
const char*         raptor_parser_get_accept_header     (raptor_parser *rdf_parser);
void                raptor_parser_set_uri_filter        (raptor_parser *parser,
                                                         raptor_uri_filter_func filter,
                                                         void *user_data);
unsigned char*      raptor_parser_generate_id           (raptor_parser *rdf_parser,
                                                         raptor_genid_type type);
raptor_world*       raptor_parser_get_world             (raptor_parser *rdf_parser);

Description

The parsing class that allows creating a parser for reading from a particular syntax (or can guess and use contextual information) that will on demand generate RDF triples to a handler function, as chunks of syntax data are passed into the parser. Parsing can be done from strings in memory, files or from URIs on the web.

There are also methods to deal with handling errors, warnings and returned triples as well as setting options (features) that can adjust how parsing is performed.

Details

raptor_parser

raptor_parser* raptor_parser;

Raptor Parser class


raptor_new_parser ()

raptor_parser*      raptor_new_parser                   (const char *name);

Constructor - create a new raptor_parser object.

raptor_init() MUST have been called before calling this function. Use raptor_new_parser_v2() if using raptor_world APIs.

name :

the parser name

Returns :

a new raptor_parser object or NULL on failure

raptor_new_parser_for_content ()

raptor_parser*      raptor_new_parser_for_content       (raptor_uri *uri,
                                                         const char *mime_type,
                                                         unsigned char *buffer,
                                                         size_t len,
                                                         unsigned char *identifier);

Constructor - create a new raptor_parser.

Uses raptor_guess_parser_name() to find a parser by scoring recognition of the syntax by a block of characters, the content identifier or a mime type. The content identifier is typically a filename or URI or some other identifier.

raptor_init() MUST have been called before calling this function. Use raptor_new_parser_for_content_v2() if using raptor_world APIs.

uri :

URI identifying the syntax (or NULL)

mime_type :

mime type identifying the content (or NULL)

buffer :

buffer of content to guess (or NULL)

len :

length of buffer

identifier :

identifier of content (or NULL)

Returns :

a new raptor_parser object or NULL on failure

raptor_start_parse ()

int                 raptor_start_parse                  (raptor_parser *rdf_parser,
                                                         raptor_uri *uri);

Start a parse of content with base URI.

Parsers that need a base URI can be tested with raptor_get_need_base_uri().

rdf_parser :

RDF parser

uri :

base URI or may be NULL if no base URI is required

Returns :

non-0 on failure, <0 if a required base URI was missing

raptor_free_parser ()

void                raptor_free_parser                  (raptor_parser *parser);

Destructor - destroy a raptor_parser object.

parser :

raptor_parser object

raptor_set_fatal_error_handler ()

void                raptor_set_fatal_error_handler      (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);

Set the parser error handling function.

The function will receive callbacks when the parser fails.

parser :

the parser

user_data :

user data to pass to function

handler :

pointer to the function

raptor_set_error_handler ()

void                raptor_set_error_handler            (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);

Set the parser error handling function.

The function will receive callbacks when the parser fails.

parser :

the parser

user_data :

user data to pass to function

handler :

pointer to the function

raptor_set_warning_handler ()

void                raptor_set_warning_handler          (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_message_handler handler);

Set the parser warning handling function.

The function will receive callbacks when the parser gives a warning.

parser :

the parser

user_data :

user data to pass to function

handler :

pointer to the function

raptor_set_statement_handler ()

void                raptor_set_statement_handler        (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_statement_handler handler);

Set the statement handler function for the parser.

parser :

raptor_parser parser object

user_data :

user data pointer for callback

handler :

new statement callback function

raptor_set_generate_id_handler ()

void                raptor_set_generate_id_handler      (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_generate_id_handler handler);

Set the generate ID handler function for the parser.

Sets the function to generate IDs for the parser. The handler is called with the user_data parameter and an ID type of either RAPTOR_GENID_TYPE_BNODEID or RAPTOR_GENID_TYPE_BAGID (latter is deprecated).

The final argument of the callback method is user_bnodeid, the value of the rdf:nodeID attribute that the user provided if any (or NULL). It can either be returned directly as the generated value when present or modified. The passed in value must be free()d if it is not used.

If handler is NULL, the default method is used

parser :

raptor_parser parser object

user_data :

user data pointer for callback

handler :

generate ID callback function

raptor_graph_handler ()

void                (*raptor_graph_handler)             (void *user_data,
                                                         raptor_uri *graph);

Named graph reporting handler function. Due to historic reasons the named graph API is separated from the statement handler. A graph is reported after all its statements.

user_data :

user data

graph :

graph to report, 0 for the default graph

raptor_set_graph_handler ()

void                raptor_set_graph_handler            (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_graph_handler handler);

Set the graph handler function for the parser.

parser :

raptor_parser parser object

user_data :

user data pointer for callback

handler :

new graph callback function

raptor_namespace_handler ()

void                (*raptor_namespace_handler)         (void *user_data,
                                                         raptor_namespace *nspace);

XML Namespace declaration reporting handler set by raptor_set_namespace_handler().

user_data :

user data

nspace :

raptor_namespace declared

raptor_set_namespace_handler ()

void                raptor_set_namespace_handler        (raptor_parser *parser,
                                                         void *user_data,
                                                         raptor_namespace_handler handler);

Set the namespace handler function for the parser.

When a prefix/namespace is seen in a parser, call the given handler with the prefix string and the raptor_uri namespace URI. Either can be NULL for the default prefix or default namespace.

The handler function does not deal with duplicates so any namespace may be declared multiple times.

parser :

raptor_parser parser object

user_data :

user data pointer for callback

handler :

new namespace callback function

raptor_get_locator ()

raptor_locator*     raptor_get_locator                  (raptor_parser *rdf_parser);

Get the current raptor locator object.

rdf_parser :

raptor parser

Returns :

raptor locator

raptor_set_default_generate_id_parameters ()

void                raptor_set_default_generate_id_parameters
                                                        (raptor_parser *rdf_parser,
                                                         char *prefix,
                                                         int base);

Set default ID generation parameters.

Sets the parameters for the default algorithm used to generate IDs. The default algorithm uses both prefix and base to generate a new identifier. The exact identifier generated is not guaranteed to be a strict concatenation of prefix and base but will use both parts. The prefix parameter is copied to generate an ID.

For finer control of the generated identifiers, use raptor_set_default_generate_id_handler().

If prefix is NULL, the default prefix is used (currently "genid") If base is less than 1, it is initialised to 1.

rdf_parser :

raptor_parser object

prefix :

prefix string

base :

integer base identifier

raptor_parse_chunk ()

int                 raptor_parse_chunk                  (raptor_parser *rdf_parser,
                                                         unsigned char *buffer,
                                                         size_t len,
                                                         int is_end);

Parse a block of content into triples.

This method can only be called after raptor_start_parse has initialised the parser.

rdf_parser :

RDF parser

buffer :

content to parse

len :

length of buffer

is_end :

non-0 if this is the end of the content (such as EOF)

Returns :

non-0 on failure.

raptor_parse_file_stream ()

int                 raptor_parse_file_stream            (raptor_parser *rdf_parser,
                                                         FILE *stream,
                                                         const char *filename,
                                                         raptor_uri *base_uri);

Parse RDF content from a FILE*.

After draining the stream, fclose is not called on it internally.

rdf_parser :

parser

stream :

FILE* of RDF content

filename :

filename of content or NULL if it has no name

base_uri :

the base URI to use

Returns :

non 0 on failure

raptor_parse_file ()

int                 raptor_parse_file                   (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri);

Parse RDF content at a file URI.

If uri is NULL (source is stdin), then the base_uri is required.

rdf_parser :

parser

uri :

URI of RDF content or NULL to read from standard input

base_uri :

the base URI to use (or NULL if the same)

Returns :

non 0 on failure

raptor_parse_uri ()

int                 raptor_parse_uri                    (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri);

Parse the RDF content at URI.

Sends an HTTP Accept: header whent the URI is of the HTTP protocol, see raptor_parse_uri_with_connection() for details including how the base_uri is used.

rdf_parser :

parser

uri :

URI of RDF content

base_uri :

the base URI to use (or NULL if the same)

Returns :

non 0 on failure

raptor_parse_uri_with_connection ()

int                 raptor_parse_uri_with_connection    (raptor_parser *rdf_parser,
                                                         raptor_uri *uri,
                                                         raptor_uri *base_uri,
                                                         void *connection);

Parse RDF content at URI using existing WWW connection.

If base_uri is not given and during resolution of the URI, a protocol redirection occurs, the final resolved URI will be used as the base URI. If redirection does not occur, the base URI will be uri.

If base_uri is given, it overrides the process above.

When connection is NULL and a MIME Type exists for the parser type - such as returned by raptor_get_mime_type(parser) - this type is sent in an HTTP Accept: header in the form Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is prefered rather than the sole answer. The latter part may not be necessary but should ensure an HTTP 200 response.

rdf_parser :

parser

uri :

URI of RDF content

base_uri :

the base URI to use (or NULL if the same)

connection :

connection object pointer or NULL to create a new one

Returns :

non 0 on failure

raptor_parse_abort ()

void                raptor_parse_abort                  (raptor_parser *rdf_parser);

Abort an ongoing parse.

Causes any ongoing generation of statements by a parser to be terminated and the parser to return controlto the application as soon as draining any existing buffers.

Most useful inside raptor_parse_file or raptor_parse_uri when the Raptor library is directing the parsing and when one of the callback handlers such as as set by raptor_set_statement_handler requires to return to the main application code.

rdf_parser :

raptor_parser parser object

raptor_get_name ()

const char*         raptor_get_name                     (raptor_parser *rdf_parser);

Get the name of a parser.

rdf_parser :

raptor_parser parser object

Returns :

the short name for the parser.

raptor_get_label ()

const char*         raptor_get_label                    (raptor_parser *rdf_parser);

Get a descriptive label of a parser.

rdf_parser :

raptor_parser parser object

Returns :

a readable label for the parser.

raptor_get_mime_type ()

const char*         raptor_get_mime_type                (raptor_parser *rdf_parser);

Return MIME type for the parser.

rdf_parser :

raptor_parser parser object

Returns :

MIME type or NULL if none available

raptor_get_need_base_uri ()

int                 raptor_get_need_base_uri            (raptor_parser *rdf_parser);

Get a boolean whether this parser needs a base URI to start parsing.

rdf_parser :

raptor_parser parser object

Returns :

non-0 if this parser needs a base URI

raptor_set_feature ()

int                 raptor_set_feature                  (raptor_parser *parser,
                                                         raptor_feature feature,
                                                         int value);

Set various parser features.

The allowed features are available via raptor_features_enumerate().

parser :

raptor_parser parser object

feature :

feature to set from enumerated raptor_feature values

value :

integer feature value (0 or larger)

Returns :

non 0 on failure or if the feature is unknown

raptor_parser_set_feature_string ()

int                 raptor_parser_set_feature_string    (raptor_parser *parser,
                                                         raptor_feature feature,
                                                         unsigned char *value);

Set parser features with string values.

The allowed features are available via raptor_features_enumerate(). If the feature type is integer, the value is interpreted as an integer.

parser :

raptor_parser parser object

feature :

feature to set from enumerated raptor_feature values

value :

feature value

Returns :

non 0 on failure or if the feature is unknown

raptor_get_feature ()

int                 raptor_get_feature                  (raptor_parser *parser,
                                                         raptor_feature feature);

Get various parser features.

The allowed features are available via raptor_features_enumerate().

Note: no feature value is negative

parser :

raptor_parser parser object

feature :

feature to get value

Returns :

feature value or < 0 for an illegal feature

raptor_parser_get_feature_string ()

const unsigned char* raptor_parser_get_feature_string   (raptor_parser *parser,
                                                         raptor_feature feature);

Get parser features with string values.

The allowed features are available via raptor_features_enumerate(). If a string is returned, it must be freed by the caller.

parser :

raptor_parser parser object

feature :

feature to get value

Returns :

feature value or NULL for an illegal feature or no value

raptor_set_parser_strict ()

void                raptor_set_parser_strict            (raptor_parser *rdf_parser,
                                                         int is_strict);

Set parser to strict / lax mode.

rdf_parser :

raptor_parser object

is_strict :

Non 0 for strict parsing

raptor_parser_get_accept_header ()

const char*         raptor_parser_get_accept_header     (raptor_parser *rdf_parser);

Get an HTTP Accept value for the parser.

The returned string must be freed by the caller such as with raptor_free_memory().

rdf_parser :

parser

Returns :

a new Accept: header string or NULL on failure

raptor_parser_set_uri_filter ()

void                raptor_parser_set_uri_filter        (raptor_parser *parser,
                                                         raptor_uri_filter_func filter,
                                                         void *user_data);

Set URI filter function for WWW retrieval.

parser :

parser object

filter :

URI filter function

user_data :

User data to pass to filter function

raptor_parser_generate_id ()

unsigned char*      raptor_parser_generate_id           (raptor_parser *rdf_parser,
                                                         raptor_genid_type type);

Generate an ID for a parser

Type can be either RAPTOR_GENID_TYPE_BNODEID or RAPTOR_GENID_TYPE_BAGID

rdf_parser :

raptor_parser parser object

type :

Type of ID to generate

Returns :

newly allocated generated ID or NULL on failure

raptor_parser_get_world ()

raptor_world*       raptor_parser_get_world             (raptor_parser *rdf_parser);

Get the raptor_world object associated with a parser.

rdf_parser :

parser

Returns :

raptor_world* pointer