/*
 * Copyright 2009-2010 10gen, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This file contains C implementations of some of the functions
 * needed by the bson module. If possible, these implementations
 * should be used to speed up BSON encoding and decoding.
 */

#include <Python.h>
#include <stdio.h>

#include <datetime.h>

#include "_cbson.h"
#include "buffer.h"
#include "time64.h"
#include "encoding_helpers.h"

static PyObject* Binary = NULL;
static PyObject* Code = NULL;
static PyObject* ObjectId = NULL;
static PyObject* DBRef = NULL;
static PyObject* RECompile = NULL;
static PyObject* UUID = NULL;
static PyObject* Timestamp = NULL;
static PyObject* MinKey = NULL;
static PyObject* MaxKey = NULL;
static PyObject* UTC = NULL;
static PyTypeObject* REType = NULL;

#if PY_VERSION_HEX < 0x02050000
#define WARN(category, message)                 \
    PyErr_Warn((category), (message))
#else
#define WARN(category, message)                 \
    PyErr_WarnEx((category), (message), 1)
#endif

/* Maximum number of regex flags */
#define FLAGS_SIZE 7

#if defined(WIN32) || defined(_MSC_VER)
/* This macro is basically an implementation of asprintf for win32
 * We get the length of the int as string and malloc a buffer for it,
 * returning -1 if that malloc fails. We then actually print to the
 * buffer to get the string value as an int. Like asprintf, the result
 * must be explicitly free'd when done being used.
 */
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define INT2STRING(buffer, i)                                           \
    *(buffer) = malloc(_scprintf("%d", (i)) + 1),                       \
        (!(buffer) ?                                                    \
         -1 :                                                           \
         _snprintf_s(*(buffer),                                         \
                     _scprintf("%d", (i)) + 1,                          \
                     _scprintf("%d", (i)) + 1,                          \
                     "%d",                                              \
                     (i)))
#define STRCAT(dest, n, src) strcat_s((dest), (n), (src))
#else
#define INT2STRING(buffer, i)                                           \
    *(buffer) = malloc(_scprintf("%d", (i)) + 1),                       \
        (!(buffer) ?                                                    \
         -1 :                                                           \
         _snprintf(*(buffer),                                           \
                     _scprintf("%d", (i)) + 1,                          \
                     "%d",                                              \
                     (i)))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif
#else
#define INT2STRING(buffer, i) asprintf((buffer), "%d", (i))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif


static PyObject* elements_to_dict(const char* string, int max,
                                  PyObject* as_class, unsigned char tz_aware);

static int _write_element_to_buffer(buffer_t buffer, int type_byte,
                                    PyObject* value, unsigned char check_keys,
                                    unsigned char uuid_subtype, unsigned char first_attempt);

/* Date stuff */
static PyObject* datetime_from_millis(long long millis) {
    int microseconds = (millis % 1000) * 1000;
    Time64_T seconds = millis / 1000;
    struct TM timeinfo;
    gmtime64_r(&seconds, &timeinfo);

    return PyDateTime_FromDateAndTime(timeinfo.tm_year + 1900,
                                      timeinfo.tm_mon + 1,
                                      timeinfo.tm_mday,
                                      timeinfo.tm_hour,
                                      timeinfo.tm_min,
                                      timeinfo.tm_sec,
                                      microseconds);
}

static long long millis_from_datetime(PyObject* datetime) {
    struct TM timeinfo;
    long long millis;

    timeinfo.tm_year = PyDateTime_GET_YEAR(datetime) - 1900;
    timeinfo.tm_mon = PyDateTime_GET_MONTH(datetime) - 1;
    timeinfo.tm_mday = PyDateTime_GET_DAY(datetime);
    timeinfo.tm_hour = PyDateTime_DATE_GET_HOUR(datetime);
    timeinfo.tm_min = PyDateTime_DATE_GET_MINUTE(datetime);
    timeinfo.tm_sec = PyDateTime_DATE_GET_SECOND(datetime);

    millis = timegm64(&timeinfo) * 1000;
    millis += PyDateTime_DATE_GET_MICROSECOND(datetime) / 1000;
    return millis;
}

/* Just make this compatible w/ the old API. */
int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
    if (buffer_write(buffer, data, size)) {
        PyErr_NoMemory();
        return 0;
    }
    return 1;
}

/* returns 0 on failure */
static int write_string(buffer_t buffer, PyObject* py_string) {
    Py_ssize_t string_length;
    const char* string = PyString_AsString(py_string);
    if (!string) {
        return 1;
    }
    string_length = PyString_Size(py_string) + 1;

    if (!buffer_write_bytes(buffer, (const char*)&string_length, 4)) {
        return 0;
    }
    if (!buffer_write_bytes(buffer, string, string_length)) {
        return 0;
    }
    return 1;
}

/* Get an error class from the bson.errors module.
 *
 * Returns a new ref */
static PyObject* _error(char* name) {
    PyObject* error;
    PyObject* errors = PyImport_ImportModule("bson.errors");
    if (!errors) {
        return NULL;
    }
    error = PyObject_GetAttrString(errors, name);
    Py_DECREF(errors);
    return error;
}

/* Reload a cached Python object.
 *
 * Returns non-zero on failure. */
static int _reload_object(PyObject** object, char* module_name, char* object_name) {
    PyObject* module;

    module = PyImport_ImportModule(module_name);
    if (!module) {
        return 1;
    }

    *object = PyObject_GetAttrString(module, object_name);
    Py_DECREF(module);

    return (*object) ? 0 : 2;
}

/* Reload all cached Python objects.
 *
 * Returns non-zero on failure. */
static int _reload_python_objects(void) {
    if (_reload_object(&Binary, "bson.binary", "Binary") ||
        _reload_object(&Code, "bson.code", "Code") ||
        _reload_object(&ObjectId, "bson.objectid", "ObjectId") ||
        _reload_object(&DBRef, "bson.dbref", "DBRef") ||
        _reload_object(&Timestamp, "bson.timestamp", "Timestamp") ||
        _reload_object(&MinKey, "bson.min_key", "MinKey") ||
        _reload_object(&MaxKey, "bson.max_key", "MaxKey") ||
        _reload_object(&UTC, "bson.tz_util", "utc") ||
        _reload_object(&RECompile, "re", "compile")) {
        return 1;
    }
    /* If we couldn't import uuid then we must be on 2.4. Just ignore. */
    if (_reload_object(&UUID, "uuid", "UUID") == 1) {
        UUID = NULL;
        PyErr_Clear();
    }
    /* Reload our REType hack too. */
    REType = PyObject_CallFunction(RECompile, "O",
                                   PyString_FromString(""))->ob_type;
    return 0;
}

static int write_element_to_buffer(buffer_t buffer, int type_byte,
                                   PyObject* value, unsigned char check_keys,
                                   unsigned char uuid_subtype,
                                   unsigned char first_attempt) {
    int result;
    if(Py_EnterRecursiveCall(" while encoding an object to BSON "))
        return 0;
    result = _write_element_to_buffer(buffer, type_byte, value,
                                      check_keys, uuid_subtype, first_attempt);
    Py_LeaveRecursiveCall();
    return result;
}

/* TODO our platform better be little-endian w/ 4-byte ints! */
/* Write a single value to the buffer (also write it's type_byte, for which
 * space has already been reserved.
 *
 * returns 0 on failure */
static int _write_element_to_buffer(buffer_t buffer, int type_byte,
                                    PyObject* value, unsigned char check_keys,
                                    unsigned char uuid_subtype, unsigned char first_attempt) {
    if (PyBool_Check(value)) {
        const long bool = PyInt_AsLong(value);
        const char c = bool ? 0x01 : 0x00;
        *(buffer_get_buffer(buffer) + type_byte) = 0x08;
        return buffer_write_bytes(buffer, &c, 1);
    }
    else if (PyInt_Check(value)) {
        const long long_value = PyInt_AsLong(value);
        const int int_value = (int)long_value;
        if (PyErr_Occurred() || long_value != int_value) { /* Overflow */
            long long long_long_value;
            PyErr_Clear();
            long_long_value = PyLong_AsLongLong(value);
            if (PyErr_Occurred()) { /* Overflow AGAIN */
                PyErr_SetString(PyExc_OverflowError,
                                "MongoDB can only handle up to 8-byte ints");
                return 0;
            }
            *(buffer_get_buffer(buffer) + type_byte) = 0x12;
            return buffer_write_bytes(buffer, (const char*)&long_long_value, 8);
        }
        *(buffer_get_buffer(buffer) + type_byte) = 0x10;
        return buffer_write_bytes(buffer, (const char*)&int_value, 4);
    } else if (PyLong_Check(value)) {
        const long long long_long_value = PyLong_AsLongLong(value);
        if (PyErr_Occurred()) { /* Overflow */
            PyErr_SetString(PyExc_OverflowError,
                            "MongoDB can only handle up to 8-byte ints");
            return 0;
        }
        *(buffer_get_buffer(buffer) + type_byte) = 0x12;
        return buffer_write_bytes(buffer, (const char*)&long_long_value, 8);
    } else if (PyFloat_Check(value)) {
        const double d = PyFloat_AsDouble(value);
        *(buffer_get_buffer(buffer) + type_byte) = 0x01;
        return buffer_write_bytes(buffer, (const char*)&d, 8);
    } else if (value == Py_None) {
        *(buffer_get_buffer(buffer) + type_byte) = 0x0A;
        return 1;
    } else if (PyDict_Check(value)) {
        *(buffer_get_buffer(buffer) + type_byte) = 0x03;
        return write_dict(buffer, value, check_keys, uuid_subtype, 0);
    } else if (PyList_Check(value) || PyTuple_Check(value)) {
        int start_position,
            length_location,
            items,
            length,
            i;
        char zero = 0;

        *(buffer_get_buffer(buffer) + type_byte) = 0x04;
        start_position = buffer_get_position(buffer);

        /* save space for length */
        length_location = buffer_save_space(buffer, 4);
        if (length_location == -1) {
            PyErr_NoMemory();
            return 0;
        }

        items = PySequence_Size(value);
        for(i = 0; i < items; i++) {
            int list_type_byte = buffer_save_space(buffer, 1);
            char* name;
            PyObject* item_value;

            if (list_type_byte == -1) {
                PyErr_NoMemory();
                return 0;
            }
            if (INT2STRING(&name, i) < 0 || !name) {
                PyErr_NoMemory();
                return 0;
            }
            if (!buffer_write_bytes(buffer, name, strlen(name) + 1)) {
                free(name);
                return 0;
            }
            free(name);

            item_value = PySequence_GetItem(value, i);
            if (!write_element_to_buffer(buffer, list_type_byte,
                                         item_value, check_keys, uuid_subtype, 1)) {
                Py_DECREF(item_value);
                return 0;
            }
            Py_DECREF(item_value);
        }

        /* write null byte and fill in length */
        if (!buffer_write_bytes(buffer, &zero, 1)) {
            return 0;
        }
        length = buffer_get_position(buffer) - start_position;
        memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
        return 1;
    } else if (PyObject_IsInstance(value, Binary)) {
        PyObject* subtype_object;

        *(buffer_get_buffer(buffer) + type_byte) = 0x05;
        subtype_object = PyObject_GetAttrString(value, "subtype");
        if (!subtype_object) {
            return 0;
        }
        {
            const long long_subtype = PyInt_AsLong(subtype_object);
            const char subtype = (const char)long_subtype;
            const int length = PyString_Size(value);

            Py_DECREF(subtype_object);
            if (subtype == 2) {
                const int other_length = length + 4;
                if (!buffer_write_bytes(buffer, (const char*)&other_length, 4)) {
                    return 0;
                }
                if (!buffer_write_bytes(buffer, &subtype, 1)) {
                    return 0;
                }
            }
            if (!buffer_write_bytes(buffer, (const char*)&length, 4)) {
                return 0;
            }
            if (subtype != 2) {
                if (!buffer_write_bytes(buffer, &subtype, 1)) {
                    return 0;
                }
            }
            {
                const char* string = PyString_AsString(value);
                if (!string) {
                    return 0;
                }
                if (!buffer_write_bytes(buffer, string, length)) {
                    return 0;
                }
            }
        }
        return 1;
    } else if (UUID && PyObject_IsInstance(value, UUID)) {
        // Just a special case of Binary above, but simpler to do as a separate case

        // UUID is always 16 bytes, subtype 3
        int length = 16;
        const char subtype = (const char)uuid_subtype;

        PyObject* bytes;

        *(buffer_get_buffer(buffer) + type_byte) = 0x05;
        if (!buffer_write_bytes(buffer, (const char*)&length, 4)) {
            return 0;
        }
        if (!buffer_write_bytes(buffer, &subtype, 1)) {
            return 0;
        }

        bytes = PyObject_GetAttrString(value, "bytes");
        if (!bytes) {
            return 0;
        }
        if (!buffer_write_bytes(buffer, PyString_AsString(bytes), length)) {
            Py_DECREF(bytes);
            return 0;
        }
        Py_DECREF(bytes);
        return 1;
    } else if (PyObject_IsInstance(value, Code)) {
        int start_position,
            length_location,
            length;
        PyObject* scope;

        *(buffer_get_buffer(buffer) + type_byte) = 0x0F;

        start_position = buffer_get_position(buffer);
        /* save space for length */
        length_location = buffer_save_space(buffer, 4);
        if (length_location == -1) {
            PyErr_NoMemory();
            return 0;
        }

        if (!write_string(buffer, value)) {
            return 0;
        }

        scope = PyObject_GetAttrString(value, "scope");
        if (!scope) {
            return 0;
        }
        if (!write_dict(buffer, scope, 0, uuid_subtype, 0)) {
            Py_DECREF(scope);
            return 0;
        }
        Py_DECREF(scope);

        length = buffer_get_position(buffer) - start_position;
        memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
        return 1;
    } else if (PyString_Check(value)) {
        int result;
        result_t status;

        *(buffer_get_buffer(buffer) + type_byte) = 0x02;
        status = check_string((const unsigned char*)PyString_AsString(value),
                              PyString_Size(value), 1, 0);
        if (status == NOT_UTF_8) {
            PyObject* InvalidStringData = _error("InvalidStringData");
            PyErr_SetString(InvalidStringData,
                            "strings in documents must be valid UTF-8");
            Py_DECREF(InvalidStringData);
            return 0;
        }
        result = write_string(buffer, value);
        return result;
    } else if (PyUnicode_Check(value)) {
        PyObject* encoded;
        int result;

        *(buffer_get_buffer(buffer) + type_byte) = 0x02;
        encoded = PyUnicode_AsUTF8String(value);
        if (!encoded) {
            return 0;
        }
        result = write_string(buffer, encoded);
        Py_DECREF(encoded);
        return result;
    } else if (PyDateTime_Check(value)) {
        long long millis;
        PyObject* utcoffset = PyObject_CallMethod(value, "utcoffset", NULL);
        if (utcoffset != Py_None) {
            PyObject* result = PyNumber_Subtract(value, utcoffset);
            Py_DECREF(utcoffset);
            if (!result) {
                return 0;
            }
            millis = millis_from_datetime(result);
            Py_DECREF(result);
        } else {
            millis = millis_from_datetime(value);
        }
        *(buffer_get_buffer(buffer) + type_byte) = 0x09;
        return buffer_write_bytes(buffer, (const char*)&millis, 8);
    } else if (PyObject_IsInstance(value, ObjectId)) {
        PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id");
        if (!pystring) {
            return 0;
        }
        {
            const char* as_string = PyString_AsString(pystring);
            if (!as_string) {
                Py_DECREF(pystring);
                return 0;
            }
            if (!buffer_write_bytes(buffer, as_string, 12)) {
                Py_DECREF(pystring);
                return 0;
            }
            Py_DECREF(pystring);
            *(buffer_get_buffer(buffer) + type_byte) = 0x07;
        }
        return 1;
    } else if (PyObject_IsInstance(value, DBRef)) {
        PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL);
        if (!as_doc) {
            return 0;
        }
        if (!write_dict(buffer, as_doc, 0, uuid_subtype, 0)) {
            Py_DECREF(as_doc);
            return 0;
        }
        Py_DECREF(as_doc);
        *(buffer_get_buffer(buffer) + type_byte) = 0x03;
        return 1;
    } else if (PyObject_IsInstance(value, Timestamp)) {
        PyObject* obj;
        long i;

        obj = PyObject_GetAttrString(value, "inc");
        if (!obj) {
            return 0;
        }
        i = PyInt_AsLong(obj);
        Py_DECREF(obj);
        if (!buffer_write_bytes(buffer, (const char*)&i, 4)) {
            return 0;
        }

        obj = PyObject_GetAttrString(value, "time");
        if (!obj) {
            return 0;
        }
        i = PyInt_AsLong(obj);
        Py_DECREF(obj);
        if (!buffer_write_bytes(buffer, (const char*)&i, 4)) {
            return 0;
        }

        *(buffer_get_buffer(buffer) + type_byte) = 0x11;
        return 1;
    }
    else if (PyObject_TypeCheck(value, REType)) {
        PyObject* py_flags = PyObject_GetAttrString(value, "flags");
        PyObject* py_pattern;
        PyObject* encoded_pattern;
        long int_flags;
        char flags[FLAGS_SIZE];
        char check_utf8 = 0;
        int pattern_length,
            flags_length;
        result_t status;

        if (!py_flags) {
            return 0;
        }
        int_flags = PyInt_AsLong(py_flags);
        Py_DECREF(py_flags);
        py_pattern = PyObject_GetAttrString(value, "pattern");
        if (!py_pattern) {
            return 0;
        }

        if (PyUnicode_Check(py_pattern)) {
            encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
            Py_DECREF(py_pattern);
            if (!encoded_pattern) {
                return 0;
            }
        } else {
            encoded_pattern = py_pattern;
            check_utf8 = 1;
        }

        status = check_string((const unsigned char*)PyString_AsString(encoded_pattern),
                              PyString_Size(encoded_pattern), check_utf8, 1);
        if (status == NOT_UTF_8) {
            PyObject* InvalidStringData = _error("InvalidStringData");
            PyErr_SetString(InvalidStringData,
                            "regex patterns must be valid UTF-8");
            Py_DECREF(InvalidStringData);
            return 0;
        } else if (status == HAS_NULL) {
            PyObject* InvalidDocument = _error("InvalidDocument");
            PyErr_SetString(InvalidDocument,
                            "regex patterns must not contain the NULL byte");
            Py_DECREF(InvalidDocument);
            return 0;
        }

        {
            const char* pattern =  PyString_AsString(encoded_pattern);
            pattern_length = strlen(pattern) + 1;

            if (!buffer_write_bytes(buffer, pattern, pattern_length)) {
                Py_DECREF(encoded_pattern);
                return 0;
            }
        }
        Py_DECREF(encoded_pattern);

        flags[0] = 0;
        /* TODO don't hardcode these */
        if (int_flags & 2) {
            STRCAT(flags, FLAGS_SIZE, "i");
        }
        if (int_flags & 4) {
            STRCAT(flags, FLAGS_SIZE, "l");
        }
        if (int_flags & 8) {
            STRCAT(flags, FLAGS_SIZE, "m");
        }
        if (int_flags & 16) {
            STRCAT(flags, FLAGS_SIZE, "s");
        }
        if (int_flags & 32) {
            STRCAT(flags, FLAGS_SIZE, "u");
        }
        if (int_flags & 64) {
            STRCAT(flags, FLAGS_SIZE, "x");
        }
        flags_length = strlen(flags) + 1;
        if (!buffer_write_bytes(buffer, flags, flags_length)) {
            return 0;
        }
        *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
        return 1;
    } else if (PyObject_IsInstance(value, MinKey)) {
        *(buffer_get_buffer(buffer) + type_byte) = 0xFF;
        return 1;
    } else if (PyObject_IsInstance(value, MaxKey)) {
        *(buffer_get_buffer(buffer) + type_byte) = 0x7F;
        return 1;
    } else if (first_attempt) {
        /* Try reloading the modules and having one more go at it. */
        if (WARN(PyExc_RuntimeWarning, "couldn't encode - reloading python "
                 "modules and trying again. if you see this without getting "
                 "an InvalidDocument exception please see http://api.mongodb"
                 ".org/python/current/faq.html#does-pymongo-work-with-mod-"
                 "wsgi") == -1) {
            return 0;
        }
        if (_reload_python_objects()) {
            return 0;
        }
        return write_element_to_buffer(buffer, type_byte, value, check_keys, uuid_subtype, 0);
    }
    {
        PyObject* errmsg = PyString_FromString("Cannot encode object: ");
        PyObject* repr = PyObject_Repr(value);
        PyObject* InvalidDocument = _error("InvalidDocument");
        PyString_ConcatAndDel(&errmsg, repr);
        PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
        Py_DECREF(errmsg);
        Py_DECREF(InvalidDocument);
        return 0;
    }
}

static int check_key_name(const char* name,
                          const Py_ssize_t name_length) {
    int i;
    if (name_length > 0 && name[0] == '$') {
        PyObject* InvalidDocument = _error("InvalidDocument");
        PyObject* errmsg = PyString_FromFormat("key '%s' must not start with '$'", name);
        PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
        Py_DECREF(errmsg);
        Py_DECREF(InvalidDocument);
        return 0;
    }
    for (i = 0; i < name_length; i++) {
        if (name[i] == '.') {
            PyObject* InvalidDocument = _error("InvalidDocument");
            PyObject* errmsg = PyString_FromFormat("key '%s' must not contain '.'", name);
            PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
            Py_DECREF(errmsg);
            Py_DECREF(InvalidDocument);
            return 0;
        }
    }
    return 1;
}

/* Write a (key, value) pair to the buffer.
 *
 * Returns 0 on failure */
int write_pair(buffer_t buffer, const char* name, Py_ssize_t name_length,
               PyObject* value, unsigned char check_keys,
               unsigned char uuid_subtype, unsigned char allow_id) {
    int type_byte;

    /* Don't write any _id elements unless we're explicitly told to -
     * _id has to be written first so we do so, but don't bother
     * deleting it from the dictionary being written. */
    if (!allow_id && strcmp(name, "_id") == 0) {
        return 1;
    }

    type_byte = buffer_save_space(buffer, 1);
    if (type_byte == -1) {
        PyErr_NoMemory();
        return 0;
    }
    if (check_keys && !check_key_name(name, name_length)) {
        return 0;
    }
    if (!buffer_write_bytes(buffer, name, name_length + 1)) {
        return 0;
    }
    if (!write_element_to_buffer(buffer, type_byte, value,
                                 check_keys, uuid_subtype, 1)) {
        return 0;
    }
    return 1;
}

int decode_and_write_pair(buffer_t buffer,
                          PyObject* key, PyObject* value,
                          unsigned char check_keys,
                          unsigned char uuid_subtype, unsigned char top_level) {
    PyObject* encoded;
    if (PyUnicode_Check(key)) {
        result_t status;
        encoded = PyUnicode_AsUTF8String(key);
        if (!encoded) {
            return 0;
        }
        status = check_string((const unsigned char*)PyString_AsString(encoded),
                              PyString_Size(encoded), 0, 1);

        if (status == HAS_NULL) {
            PyObject* InvalidDocument = _error("InvalidDocument");
            PyErr_SetString(InvalidDocument,
                            "Key names must not contain the NULL byte");
            Py_DECREF(InvalidDocument);
            return 0;
        }
    } else if (PyString_Check(key)) {
        result_t status;
        encoded = key;
        Py_INCREF(encoded);

        status = check_string((const unsigned char*)PyString_AsString(encoded),
                                       PyString_Size(encoded), 1, 1);

        if (status == NOT_UTF_8) {
            PyObject* InvalidStringData = _error("InvalidStringData");
            PyErr_SetString(InvalidStringData,
                            "strings in documents must be valid UTF-8");
            Py_DECREF(InvalidStringData);
            return 0;
        } else if (status == HAS_NULL) {
            PyObject* InvalidDocument = _error("InvalidDocument");
            PyErr_SetString(InvalidDocument,
                            "Key names must not contain the NULL byte");
            Py_DECREF(InvalidDocument);
            return 0;
        }
    } else {
        PyObject* InvalidDocument = _error("InvalidDocument");
        PyObject* errmsg = PyString_FromString("documents must have only string keys, key was ");
        PyObject* repr = PyObject_Repr(key);
        PyString_ConcatAndDel(&errmsg, repr);
        PyErr_SetString(InvalidDocument, PyString_AsString(errmsg));
        Py_DECREF(InvalidDocument);
        Py_DECREF(errmsg);
        return 0;
    }

    /* If top_level is True, don't allow writing _id here - it was already written. */
    if (!write_pair(buffer, PyString_AsString(encoded),
                    PyString_Size(encoded), value,
                    check_keys, uuid_subtype, !top_level)) {
        Py_DECREF(encoded);
        return 0;
    }

    Py_DECREF(encoded);
    return 1;
}

/* returns 0 on failure */
int write_dict(buffer_t buffer, PyObject* dict,
               unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) {
    PyObject* key;
    PyObject* iter;
    char zero = 0;
    int length;
    int length_location;

    if (!PyDict_Check(dict)) {
        PyObject* errmsg = PyString_FromString("encoder expected a mapping type but got: ");
        PyObject* repr = PyObject_Repr(dict);
        PyString_ConcatAndDel(&errmsg, repr);
        PyErr_SetString(PyExc_TypeError, PyString_AsString(errmsg));
        Py_DECREF(errmsg);
        return 0;
    }

    length_location = buffer_save_space(buffer, 4);
    if (length_location == -1) {
        PyErr_NoMemory();
        return 0;
    }

    /* Write _id first if this is a top level doc. */
    if (top_level) {
        PyObject* _id = PyDict_GetItemString(dict, "_id");
        if (_id) {
            /* Don't bother checking keys, but do make sure we're allowed to
             * write _id */
            if (!write_pair(buffer, "_id", 3, _id, 0, uuid_subtype, 1)) {
                return 0;
            }
        }
    }

    iter = PyObject_GetIter(dict);
    if (iter == NULL) {
        return 0;
    }
    while ((key = PyIter_Next(iter)) != NULL) {
        PyObject* value = PyDict_GetItem(dict, key);
        if (!value) {
            PyErr_SetObject(PyExc_KeyError, key);
            Py_DECREF(key);
            Py_DECREF(iter);
            return 0;
        }
        if (!decode_and_write_pair(buffer, key, value,
                                   check_keys, uuid_subtype, top_level)) {
            Py_DECREF(key);
            Py_DECREF(iter);
            return 0;
        }
        Py_DECREF(key);
    }
    Py_DECREF(iter);

    /* write null byte and fill in length */
    if (!buffer_write_bytes(buffer, &zero, 1)) {
        return 0;
    }
    length = buffer_get_position(buffer) - length_location;
    memcpy(buffer_get_buffer(buffer) + length_location, &length, 4);
    return 1;
}

static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
    PyObject* dict;
    PyObject* result;
    unsigned char check_keys;
    unsigned char uuid_subtype;
    buffer_t buffer;

    if (!PyArg_ParseTuple(args, "Obb", &dict, &check_keys, &uuid_subtype)) {
        return NULL;
    }

    buffer = buffer_new();
    if (!buffer) {
        PyErr_NoMemory();
        return NULL;
    }

    if (!write_dict(buffer, dict, check_keys, uuid_subtype, 1)) {
        buffer_free(buffer);
        return NULL;
    }

    /* objectify buffer */
    result = Py_BuildValue("s#", buffer_get_buffer(buffer),
                           buffer_get_position(buffer));
    buffer_free(buffer);
    return result;
}

static PyObject* get_value(const char* buffer, int* position, int type,
                           int max, PyObject* as_class, unsigned char tz_aware) {
    PyObject* value;
    PyObject* error;
    switch (type) {
    case 1:
        {
            double d;
            if (max < 8) {
                goto invalid;
            }
            memcpy(&d, buffer + *position, 8);
            value = PyFloat_FromDouble(d);
            if (!value) {
                return NULL;
            }
            *position += 8;
            break;
        }
    case 2:
    case 13:
    case 14:
        {
            int value_length = ((int*)(buffer + *position))[0] - 1;
            if (max < value_length) {
                goto invalid;
            }
            *position += 4;
            value = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
            if (!value) {
                return NULL;
            }
            *position += value_length + 1;
            break;
        }
    case 3:
        {
            int size;
            memcpy(&size, buffer + *position, 4);
            if (max < size) {
                goto invalid;
            }
            value = elements_to_dict(buffer + *position + 4, size - 5, as_class, tz_aware);
            if (!value) {
                return NULL;
            }

            /* Decoding for DBRefs */
            if (strcmp(buffer + *position + 5, "$ref") == 0) { /* DBRef */
                PyObject* dbref;
                PyObject* collection = PyDict_GetItemString(value, "$ref");
                PyObject* id = PyDict_GetItemString(value, "$id");
                PyObject* database = PyDict_GetItemString(value, "$db");

                Py_INCREF(collection);
                PyDict_DelItemString(value, "$ref");
                Py_INCREF(id);
                PyDict_DelItemString(value, "$id");

                if (database == NULL) {
                    database = Py_None;
                    Py_INCREF(database);
                } else {
                    Py_INCREF(database);
                    PyDict_DelItemString(value, "$db");
                }

                dbref = PyObject_CallFunctionObjArgs(DBRef, collection, id, database, value, NULL);
                Py_DECREF(value);
                value = dbref;

                Py_DECREF(id);
                Py_DECREF(collection);
                Py_DECREF(database);
                if (!value) {
                    return NULL;
                }
            }

            *position += size;
            break;
        }
    case 4:
        {
            int size,
                end;

            memcpy(&size, buffer + *position, 4);
            if (max < size) {
                goto invalid;
            }
            end = *position + size - 1;
            *position += 4;

            value = PyList_New(0);
            if (!value) {
                return NULL;
            }
            while (*position < end) {
                PyObject* to_append;

                int type = (int)buffer[(*position)++];
                int key_size = strlen(buffer + *position);
                *position += key_size + 1; /* just skip the key, they're in order. */
                to_append = get_value(buffer, position, type, max - key_size, as_class, tz_aware);
                if (!to_append) {
                    return NULL;
                }
                PyList_Append(value, to_append);
                Py_DECREF(to_append);
            }
            (*position)++;
            break;
        }
    case 5:
        {
            PyObject* data;
            PyObject* st;
            int length,
                subtype;

            memcpy(&length, buffer + *position, 4);
            if (max < length) {
                goto invalid;
            }
            subtype = (unsigned char)buffer[*position + 4];

            if (subtype == 2) {
                data = PyString_FromStringAndSize(buffer + *position + 9, length - 4);
            } else {
                data = PyString_FromStringAndSize(buffer + *position + 5, length);
            }
            if (!data) {
                return NULL;
            }

            if ((subtype == 3 || subtype == 4) && UUID) { // Encode as UUID, not Binary
                PyObject* kwargs;
                PyObject* args = PyTuple_New(0);
                if (!args) {
                    return NULL;
                }
                kwargs = PyDict_New();
                if (!kwargs) {
                    Py_DECREF(args);
                    return NULL;
                }

                assert(length == 16); // UUID should always be 16 bytes

                PyDict_SetItemString(kwargs, "bytes", data);
                value = PyObject_Call(UUID, args, kwargs);

                Py_DECREF(args);
                Py_DECREF(kwargs);
                Py_DECREF(data);
                if (!value) {
                    return NULL;
                }

                *position += length + 5;
                break;
            }

            st = PyInt_FromLong(subtype);
            if (!st) {
                Py_DECREF(data);
                return NULL;
            }
            value = PyObject_CallFunctionObjArgs(Binary, data, st, NULL);
            Py_DECREF(st);
            Py_DECREF(data);
            if (!value) {
                return NULL;
            }
            *position += length + 5;
            break;
        }
    case 6:
    case 10:
        {
            value = Py_None;
            Py_INCREF(value);
            break;
        }
    case 7:
        {
            if (max < 12) {
                goto invalid;
            }
            value = PyObject_CallFunction(ObjectId, "s#", buffer + *position, 12);
            if (!value) {
                return NULL;
            }
            *position += 12;
            break;
        }
    case 8:
        {
            value = buffer[(*position)++] ? Py_True : Py_False;
            Py_INCREF(value);
            break;
        }
    case 9:
        {
            PyObject* naive;
            PyObject* replace;
            PyObject* args;
            PyObject* kwargs;
            if (max < 8) {
                goto invalid;
            }
            naive = datetime_from_millis(*(long long*)(buffer + *position));
            *position += 8;
            if (!tz_aware) { /* In the naive case, we're done here. */
                value = naive;
                break;
            }

            if (!naive) {
                return NULL;
            }
            replace = PyObject_GetAttrString(naive, "replace");
            Py_DECREF(naive);
            if (!replace) {
                return NULL;
            }
            args = PyTuple_New(0);
            if (!args) {
                Py_DECREF(replace);
                return NULL;
            }
            kwargs = PyDict_New();
            if (!kwargs) {
                Py_DECREF(replace);
                Py_DECREF(args);
                return NULL;
            }
            if (PyDict_SetItemString(kwargs, "tzinfo", UTC) == -1) {
                Py_DECREF(replace);
                Py_DECREF(args);
                Py_DECREF(kwargs);
                return NULL;
            }
            value = PyObject_Call(replace, args, kwargs);
            Py_DECREF(replace);
            Py_DECREF(args);
            Py_DECREF(kwargs);
            break;
        }
    case 11:
        {
            PyObject* pattern;
            int flags_length,
                flags,
                i;
            int pattern_length = strlen(buffer + *position);
            if (max < pattern_length) {
                goto invalid;
            }
            pattern = PyUnicode_DecodeUTF8(buffer + *position, pattern_length, "strict");
            if (!pattern) {
                return NULL;
            }
            *position += pattern_length + 1;
            flags_length = strlen(buffer + *position);
            if (max < pattern_length + flags_length) {
                goto invalid;
            }
            flags = 0;
            for (i = 0; i < flags_length; i++) {
                if (buffer[*position + i] == 'i') {
                    flags |= 2;
                } else if (buffer[*position + i] == 'l') {
                    flags |= 4;
                } else if (buffer[*position + i] == 'm') {
                    flags |= 8;
                } else if (buffer[*position + i] == 's') {
                    flags |= 16;
                } else if (buffer[*position + i] == 'u') {
                    flags |= 32;
                } else if (buffer[*position + i] == 'x') {
                    flags |= 64;
                }
            }
            *position += flags_length + 1;
            value = PyObject_CallFunction(RECompile, "Oi", pattern, flags);
            Py_DECREF(pattern);
            break;
        }
    case 12:
        {
            int collection_length;
            PyObject* collection;
            PyObject* id;

            *position += 4;
            collection_length = strlen(buffer + *position);
            if (max < collection_length) {
                goto invalid;
            }
            collection = PyUnicode_DecodeUTF8(buffer + *position, collection_length, "strict");
            if (!collection) {
                return NULL;
            }
            *position += collection_length + 1;
            if (max < collection_length + 12) {
                goto invalid;
            }
            id = PyObject_CallFunction(ObjectId, "s#", buffer + *position, 12);
            if (!id) {
                Py_DECREF(collection);
                return NULL;
            }
            *position += 12;
            value = PyObject_CallFunctionObjArgs(DBRef, collection, id, NULL);
            Py_DECREF(collection);
            Py_DECREF(id);
            break;
        }
    case 15:
        {
            int code_length,
                scope_size;
            PyObject* code;
            PyObject* scope;

            *position += 8;
            code_length = strlen(buffer + *position);
            if (max < 8 + code_length) {
                goto invalid;
            }
            code = PyUnicode_DecodeUTF8(buffer + *position, code_length, "strict");
            if (!code) {
                return NULL;
            }
            *position += code_length + 1;

            memcpy(&scope_size, buffer + *position, 4);
            scope = elements_to_dict(buffer + *position + 4, scope_size - 5,
                                     (PyObject*)&PyDict_Type, tz_aware);
            if (!scope) {
                Py_DECREF(code);
                return NULL;
            }
            *position += scope_size;

            value = PyObject_CallFunctionObjArgs(Code, code, scope, NULL);
            Py_DECREF(code);
            Py_DECREF(scope);
            break;
        }
    case 16:
        {
            int i;
            if (max < 4) {
                goto invalid;
            }
            memcpy(&i, buffer + *position, 4);
            value = PyInt_FromLong(i);
            if (!value) {
                return NULL;
            }
            *position += 4;
            break;
        }
    case 17:
        {
            unsigned int time, inc;
            if (max < 8) {
                goto invalid;
            }
            memcpy(&inc, buffer + *position, 4);
            memcpy(&time, buffer + *position + 4, 4);
            value = PyObject_CallFunction(Timestamp, "II", time, inc);
            if (!value) {
                return NULL;
            }
            *position += 8;
            break;
        }
    case 18:
        {
            long long ll;
            if (max < 8) {
                goto invalid;
            }
            memcpy(&ll, buffer + *position, 8);
            value = PyLong_FromLongLong(ll);
            if (!value) {
                return NULL;
            }
            *position += 8;
            break;
        }
    case -1:
        {
            value = PyObject_CallFunctionObjArgs(MinKey, NULL);
            break;
        }
    case 127:
        {
            value = PyObject_CallFunctionObjArgs(MaxKey, NULL);
            break;
        }
    default:
        {
            PyObject* InvalidDocument = _error("InvalidDocument");
            PyErr_SetString(InvalidDocument, "no c decoder for this type yet");
            Py_DECREF(InvalidDocument);
            return NULL;
        }
    }
    return value;

    invalid:

    error = _error("InvalidBSON");
    PyErr_SetNone(error);
    Py_DECREF(error);
    return NULL;
}

static PyObject* elements_to_dict(const char* string, int max,
                                  PyObject* as_class, unsigned char tz_aware) {
    int position = 0;
    PyObject* dict = PyObject_CallObject(as_class, NULL);
    if (!dict) {
        return NULL;
    }
    while (position < max) {
        PyObject* name;
        PyObject* value;
        int type = (int)string[position++];
        int name_length = strlen(string + position);
        if (position + name_length >= max) {
            PyObject* InvalidBSON = _error("InvalidBSON");
            PyErr_SetNone(InvalidBSON);
            Py_DECREF(InvalidBSON);
            return NULL;
        }
        name = PyUnicode_DecodeUTF8(string + position, name_length, "strict");
        if (!name) {
            return NULL;
        }
        position += name_length + 1;
        value = get_value(string, &position, type, max - position, as_class, tz_aware);
        if (!value) {
            return NULL;
        }

        PyObject_SetItem(dict, name, value);
        Py_DECREF(name);
        Py_DECREF(value);
    }
    return dict;
}

static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
    unsigned int size;
    Py_ssize_t total_size;
    const char* string;
    PyObject* bson;
    PyObject* as_class;
    unsigned char tz_aware;
    PyObject* dict;
    PyObject* remainder;
    PyObject* result;

    if (!PyArg_ParseTuple(args, "OOb", &bson, &as_class, &tz_aware)) {
        return NULL;
    }

    if (!PyString_Check(bson)) {
        PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string");
        return NULL;
    }
    total_size = PyString_Size(bson);
    if (total_size < 5) {
        PyObject* InvalidBSON = _error("InvalidBSON");
        PyErr_SetString(InvalidBSON,
                        "not enough data for a BSON document");
        Py_DECREF(InvalidBSON);
        return NULL;
    }

    string = PyString_AsString(bson);
    if (!string) {
        return NULL;
    }
    memcpy(&size, string, 4);

    if (total_size < size) {
        PyObject* InvalidBSON = _error("InvalidBSON");
        PyErr_SetString(InvalidBSON,
                        "objsize too large");
        Py_DECREF(InvalidBSON);
        return NULL;
    }

    if (string[size - 1]) {
        PyObject* InvalidBSON = _error("InvalidBSON");
        PyErr_SetString(InvalidBSON,
                        "bad eoo");
        Py_DECREF(InvalidBSON);
        return NULL;
    }

    dict = elements_to_dict(string + 4, size - 5, as_class, tz_aware);
    if (!dict) {
        return NULL;
    }
    remainder = PyString_FromStringAndSize(string + size, total_size - size);
    if (!remainder) {
        Py_DECREF(dict);
        return NULL;
    }
    result = Py_BuildValue("OO", dict, remainder);
    Py_DECREF(dict);
    Py_DECREF(remainder);
    return result;
}

static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
    unsigned int size;
    Py_ssize_t total_size;
    const char* string;
    PyObject* bson;
    PyObject* dict;
    PyObject* result;
    PyObject* as_class = (PyObject*)&PyDict_Type;
    unsigned char tz_aware = 1;

    if (!PyArg_ParseTuple(args, "O|Ob", &bson, &as_class, &tz_aware)) {
        return NULL;
    }

    if (!PyString_Check(bson)) {
        PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a string");
        return NULL;
    }
    total_size = PyString_Size(bson);
    string = PyString_AsString(bson);
    if (!string) {
        return NULL;
    }

    result = PyList_New(0);

    while (total_size > 0) {
        if (total_size < 5) {
            PyObject* InvalidBSON = _error("InvalidBSON");
            PyErr_SetString(InvalidBSON,
                            "not enough data for a BSON document");
            Py_DECREF(InvalidBSON);
            return NULL;
        }

        memcpy(&size, string, 4);

        if (total_size < size) {
            PyObject* InvalidBSON = _error("InvalidBSON");
            PyErr_SetString(InvalidBSON,
                            "objsize too large");
            Py_DECREF(InvalidBSON);
            return NULL;
        }

        if (string[size - 1]) {
            PyObject* InvalidBSON = _error("InvalidBSON");
            PyErr_SetString(InvalidBSON,
                            "bad eoo");
            Py_DECREF(InvalidBSON);
            return NULL;
        }

        dict = elements_to_dict(string + 4, size - 5, as_class, tz_aware);
        if (!dict) {
            return NULL;
        }
        PyList_Append(result, dict);
        Py_DECREF(dict);
        string += size;
        total_size -= size;
    }

    return result;
}

static PyMethodDef _CBSONMethods[] = {
    {"_dict_to_bson", _cbson_dict_to_bson, METH_VARARGS,
     "convert a dictionary to a string containing it's BSON representation."},
    {"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
     "convert a BSON string to a SON object."},
    {"decode_all", _cbson_decode_all, METH_VARARGS,
     "convert binary data to a sequence of documents."},
    {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC init_cbson(void) {
    PyObject *m;

    PyDateTime_IMPORT;
    m = Py_InitModule("_cbson", _CBSONMethods);
    if (m == NULL) {
        return;
    }

    // TODO we don't do any error checking here, should we be?
    _reload_python_objects();
}