2010-01-29 Dirk Pranke <dpranke@chromium.org>

Reviewed by Eric Seidel. Check in a copy of the simplejson library; it will be used by the Chromium port of run-webkit-tests. https://bugs.webkit.org/show_bug.cgi?id=31498 * simplejson: Added. * simplejson/LICENSE.txt: Added. * simplejson/README.txt: Added. * simplejson/__init__.py: Added. * simplejson/_speedups.c: Added. (ascii_escape_char): (ascii_escape_unicode): (ascii_escape_str): (py_encode_basestring_ascii): (init_speedups): * simplejson/decoder.py: Added. * simplejson/encoder.py: Added. * simplejson/jsonfilter.py: Added. * simplejson/scanner.py: Added. git-svn-id: svn://svn.chromium.org/blink/trunk@54087 bbb929c8-8fbe-4397-9dbb-9b2b20218538

2010-01-29 Dirk Pranke <dpranke@chromium.org>
Reviewed by Eric Seidel. Check in a copy of the simplejson library; it will be used by the Chromium port of run-webkit-tests. https://bugs.webkit.org/show_bug.cgi?id=31498 * simplejson: Added. * simplejson/LICENSE.txt: Added. * simplejson/README.txt: Added. * simplejson/__init__.py: Added. * simplejson/_speedups.c: Added. (ascii_escape_char): (ascii_escape_unicode): (ascii_escape_str): (py_encode_basestring_ascii): (init_speedups): * simplejson/decoder.py: Added. * simplejson/encoder.py: Added. * simplejson/jsonfilter.py: Added. * simplejson/scanner.py: Added. git-svn-id: svn://svn.chromium.org/blink/trunk@54087 bbb929c8-8fbe-4397-9dbb-9b2b20218538
5f3304c5 · dpranke@chromium.org · 85ffb3bd · 5f3304c5 · 5f3304c5 · 5f3304c5
Commit 5f3304c5 authored Jan 30, 2010 by dpranke@chromium.org
9 changed files
--- a/third_party/WebKit/WebKitTools/ChangeLog
+++ b/third_party/WebKit/WebKitTools/ChangeLog
+2010-01-29  Dirk Pranke  <dpranke@chromium.org>
+        Reviewed by Eric Seidel.
+        Check in a copy of the simplejson library; it will be used by
+        the Chromium port of run-webkit-tests.
+        https://bugs.webkit.org/show_bug.cgi?id=31498
+        * simplejson: Added.
+        * simplejson/LICENSE.txt: Added.
+        * simplejson/README.txt: Added.
+        * simplejson/__init__.py: Added.
+        * simplejson/_speedups.c: Added.
+        (ascii_escape_char):
+        (ascii_escape_unicode):
+        (ascii_escape_str):
+        (py_encode_basestring_ascii):
+        (init_speedups):
+        * simplejson/decoder.py: Added.
+        * simplejson/encoder.py: Added.
+        * simplejson/jsonfilter.py: Added.
+        * simplejson/scanner.py: Added.
 2010-01-29  Dirk Pranke  <dpranke@chromium.org>
        No review

--- a/third_party/WebKit/WebKitTools/simplejson/LICENSE.txt
+++ b/third_party/WebKit/WebKitTools/simplejson/LICENSE.txt
+Copyright (c) 2006 Bob Ippolito
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/third_party/WebKit/WebKitTools/simplejson/README.txt
+++ b/third_party/WebKit/WebKitTools/simplejson/README.txt
+URL: http://undefined.org/python/#simplejson
+Version: 1.7.3
+License: MIT
+License File: LICENSE.txt
+Description:
+simplejson is a JSON encoder and decoder for Python.
+Local Modifications:
+Removed unit tests from current distribution.
--- a/third_party/WebKit/WebKitTools/simplejson/__init__.py
+++ b/third_party/WebKit/WebKitTools/simplejson/__init__.py
--- a/third_party/WebKit/WebKitTools/simplejson/_speedups.c
+++ b/third_party/WebKit/WebKitTools/simplejson/_speedups.c
+#include "Python.h"
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#endif
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
+void init_speedups(void);
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
+#define MIN_EXPANSION 6
+#ifdef Py_UNICODE_WIDE
+#define MAX_EXPANSION (2 * MIN_EXPANSION)
+#else
+#define MAX_EXPANSION MIN_EXPANSION
+#endif
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
+    Py_UNICODE x;
+    output[chars++] = '\\';
+    switch (c) {
+        case '/': output[chars++] = (char)c; break;
+        case '\\': output[chars++] = (char)c; break;
+        case '"': output[chars++] = (char)c; break;
+        case '\b': output[chars++] = 'b'; break;
+        case '\f': output[chars++] = 'f'; break;
+        case '\n': output[chars++] = 'n'; break;
+        case '\r': output[chars++] = 'r'; break;
+        case '\t': output[chars++] = 't'; break;
+        default:
+#ifdef Py_UNICODE_WIDE
+            if (c >= 0x10000) {
+                /* UTF-16 surrogate pair */
+                Py_UNICODE v = c - 0x10000;
+                c = 0xd800 | ((v >> 10) & 0x3ff);
+                output[chars++] = 'u';
+                x = (c & 0xf000) >> 12;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x0f00) >> 8;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x00f0) >> 4;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x000f);
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                c = 0xdc00 | (v & 0x3ff);
+                output[chars++] = '\\';
+            }
+#endif
+            output[chars++] = 'u';
+            x = (c & 0xf000) >> 12;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x0f00) >> 8;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x00f0) >> 4;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x000f);
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+    }
+    return chars;
+}
+static PyObject *
+ascii_escape_unicode(PyObject *pystr) {
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    Py_UNICODE *input_unicode;
+    input_chars = PyUnicode_GET_SIZE(pystr);
+    input_unicode = PyUnicode_AS_UNICODE(pystr);
+    /* One char input can be up to 6 chars output, estimate 4 of these */
+    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = input_unicode[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        } else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        if (output_size - chars < (1 + MAX_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            output_size *= 2;
+            /* This is an upper bound */
+            if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
+                output_size = 2 + (input_chars * MAX_EXPANSION);
+            }
+            if (_PyString_Resize(&rval, output_size) == -1) {
+                return NULL;
+            }
+            output = PyString_AS_STRING(rval);
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+static PyObject *
+ascii_escape_str(PyObject *pystr) {
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    char *input_str;
+    input_chars = PyString_GET_SIZE(pystr);
+    input_str = PyString_AS_STRING(pystr);
+    /* One char input can be up to 6 chars output, estimate 4 of these */
+    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = (Py_UNICODE)input_str[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        } else if (c > 0x7F) {
+            /* We hit a non-ASCII character, bail to unicode mode */
+            PyObject *uni;
+            Py_DECREF(rval);
+            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+            if (uni == NULL) {
+                return NULL;
+            }
+            rval = ascii_escape_unicode(uni);
+            Py_DECREF(uni);
+            return rval;
+        } else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        /* An ASCII char can't possibly expand to a surrogate! */
+        if (output_size - chars < (1 + MIN_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            output_size *= 2;
+            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
+                output_size = 2 + (input_chars * MIN_EXPANSION);
+            }
+            if (_PyString_Resize(&rval, output_size) == -1) {
+                return NULL;
+            }
+            output = PyString_AS_STRING(rval);
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+    "encode_basestring_ascii(basestring) -> str\n"
+    "\n"
+    "..."
+);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
+    /* METH_O */
+    if (PyString_Check(pystr)) {
+        return ascii_escape_str(pystr);
+    } else if (PyUnicode_Check(pystr)) {
+        return ascii_escape_unicode(pystr);
+    }
+    PyErr_SetString(PyExc_TypeError, "first argument must be a string");
+    return NULL;
+}
+#define DEFN(n, k) \
+    {  \
+        #n, \
+        (PyCFunction)py_ ##n, \
+        k, \
+        pydoc_ ##n \
+    }
+static PyMethodDef speedups_methods[] = {
+    DEFN(encode_basestring_ascii, METH_O),
+    {}
+};
+#undef DEFN
+void
+init_speedups(void)
+{
+    PyObject *m;
+    m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
+}
--- a/third_party/WebKit/WebKitTools/simplejson/decoder.py
+++ b/third_party/WebKit/WebKitTools/simplejson/decoder.py
+"""
+Implementation of JSONDecoder
+"""
+import re
+from simplejson.scanner import Scanner, pattern
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+def _floatconstants():
+    import struct
+    import sys
+    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+    if sys.byteorder != 'big':
+        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+    nan, inf = struct.unpack('dd', _BYTES)
+    return nan, inf, -inf
+NaN, PosInf, NegInf = _floatconstants()
+def linecol(doc, pos):
+    lineno = doc.count('\n', 0, pos) + 1
+    if lineno == 1:
+        colno = pos
+    else:
+        colno = pos - doc.rindex('\n', 0, pos)
+    return lineno, colno
+def errmsg(msg, doc, pos, end=None):
+    lineno, colno = linecol(doc, pos)
+    if end is None:
+        return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
+    endlineno, endcolno = linecol(doc, end)
+    return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
+        msg, lineno, colno, endlineno, endcolno, pos, end)
+_CONSTANTS = {
+    '-Infinity': NegInf,
+    'Infinity': PosInf,
+    'NaN': NaN,
+    'true': True,
+    'false': False,
+    'null': None,
+}
+def JSONConstant(match, context, c=_CONSTANTS):
+    return c[match.group(0)], None
+pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
+def JSONNumber(match, context):
+    match = JSONNumber.regex.match(match.string, *match.span())
+    integer, frac, exp = match.groups()
+    if frac or exp:
+        res = float(integer + (frac or '') + (exp or ''))
+    else:
+        res = int(integer)
+    return res, None
+pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
+STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
+BACKSLASH = {
+    '"': u'"', '\\': u'\\', '/': u'/',
+    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+DEFAULT_ENCODING = "utf-8"
+def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
+    if encoding is None:
+        encoding = DEFAULT_ENCODING
+    chunks = []
+    _append = chunks.append
+    begin = end - 1
+    while 1:
+        chunk = _m(s, end)
+        if chunk is None:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        end = chunk.end()
+        content, terminator = chunk.groups()
+        if content:
+            if not isinstance(content, unicode):
+                content = unicode(content, encoding)
+            _append(content)
+        if terminator == '"':
+            break
+        try:
+            esc = s[end]
+        except IndexError:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        if esc != 'u':
+            try:
+                m = _b[esc]
+            except KeyError:
+                raise ValueError(
+                    errmsg("Invalid \\escape: %r" % (esc,), s, end))
+            end += 1
+        else:
+            esc = s[end + 1:end + 5]
+            try:
+                m = unichr(int(esc, 16))
+                if len(esc) != 4 or not esc.isalnum():
+                    raise ValueError
+            except ValueError:
+                raise ValueError(errmsg("Invalid \\uXXXX escape", s, end))
+            end += 5
+        _append(m)
+    return u''.join(chunks), end
+def JSONString(match, context):
+    encoding = getattr(context, 'encoding', None)
+    return scanstring(match.string, match.end(), encoding)
+pattern(r'"')(JSONString)
+WHITESPACE = re.compile(r'\s*', FLAGS)
+def JSONObject(match, context, _w=WHITESPACE.match):
+    pairs = {}
+    s = match.string
+    end = _w(s, match.end()).end()
+    nextchar = s[end:end + 1]
+    # trivial empty object
+    if nextchar == '}':
+        return pairs, end + 1
+    if nextchar != '"':
+        raise ValueError(errmsg("Expecting property name", s, end))
+    end += 1
+    encoding = getattr(context, 'encoding', None)
+    iterscan = JSONScanner.iterscan
+    while True:
+        key, end = scanstring(s, end, encoding)
+        end = _w(s, end).end()
+        if s[end:end + 1] != ':':
+            raise ValueError(errmsg("Expecting : delimiter", s, end))
+        end = _w(s, end + 1).end()
+        try:
+            value, end = iterscan(s, idx=end, context=context).next()
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        pairs[key] = value
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == '}':
+            break
+        if nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end - 1))
+    object_hook = getattr(context, 'object_hook', None)
+    if object_hook is not None:
+        pairs = object_hook(pairs)
+    return pairs, end
+pattern(r'{')(JSONObject)
+def JSONArray(match, context, _w=WHITESPACE.match):
+    values = []
+    s = match.string
+    end = _w(s, match.end()).end()
+    # look-ahead for trivial empty array
+    nextchar = s[end:end + 1]
+    if nextchar == ']':
+        return values, end + 1
+    iterscan = JSONScanner.iterscan
+    while True:
+        try:
+            value, end = iterscan(s, idx=end, context=context).next()
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        values.append(value)
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == ']':
+            break
+        if nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end))
+        end = _w(s, end).end()
+    return values, end
+pattern(r'\[')(JSONArray)
+ANYTHING = [
+    JSONObject,
+    JSONArray,
+    JSONString,
+    JSONConstant,
+    JSONNumber,
+]
+JSONScanner = Scanner(ANYTHING)
+class JSONDecoder(object):
+    """
+    Simple JSON <http://json.org> decoder
+    Performs the following translations in decoding:
+    +---------------+-------------------+
+    | JSON          | Python            |
+    +===============+===================+
+    | object        | dict              |
+    +---------------+-------------------+
+    | array         | list              |
+    +---------------+-------------------+
+    | string        | unicode           |
+    +---------------+-------------------+
+    | number (int)  | int, long         |
+    +---------------+-------------------+
+    | number (real) | float             |
+    +---------------+-------------------+
+    | true          | True              |
+    +---------------+-------------------+
+    | false         | False             |
+    +---------------+-------------------+
+    | null          | None              |
+    +---------------+-------------------+
+    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+    their corresponding ``float`` values, which is outside the JSON spec.
+    """
+    _scanner = Scanner(ANYTHING)
+    __all__ = ['__init__', 'decode', 'raw_decode']
+    def __init__(self, encoding=None, object_hook=None):
+        """
+        ``encoding`` determines the encoding used to interpret any ``str``
+        objects decoded by this instance (utf-8 by default).  It has no
+        effect when decoding ``unicode`` objects.
+        Note that currently only encodings that are a superset of ASCII work,
+        strings of other encodings should be passed in as ``unicode``.
+        ``object_hook``, if specified, will be called with the result
+        of every JSON object decoded and its return value will be used in
+        place of the given ``dict``.  This can be used to provide custom
+        deserializations (e.g. to support JSON-RPC class hinting).
+        """
+        self.encoding = encoding
+        self.object_hook = object_hook
+    def decode(self, s, _w=WHITESPACE.match):
+        """
+        Return the Python representation of ``s`` (a ``str`` or ``unicode``
+        instance containing a JSON document)
+        """
+        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+        end = _w(s, end).end()
+        if end != len(s):
+            raise ValueError(errmsg("Extra data", s, end, len(s)))
+        return obj
+    def raw_decode(self, s, **kw):
+        """
+        Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+        with a JSON document) and return a 2-tuple of the Python
+        representation and the index in ``s`` where the document ended.
+        This can be used to decode a JSON document from a string that may
+        have extraneous data at the end.
+        """
+        kw.setdefault('context', self)
+        try:
+            obj, end = self._scanner.iterscan(s, **kw).next()
+        except StopIteration:
+            raise ValueError("No JSON object could be decoded")
+        return obj, end
+__all__ = ['JSONDecoder']
--- a/third_party/WebKit/WebKitTools/simplejson/encoder.py
+++ b/third_party/WebKit/WebKitTools/simplejson/encoder.py
--- a/third_party/WebKit/WebKitTools/simplejson/jsonfilter.py
+++ b/third_party/WebKit/WebKitTools/simplejson/jsonfilter.py
+import simplejson
+import cgi
+class JSONFilter(object):
+    def __init__(self, app, mime_type='text/x-json'):
+        self.app = app
+        self.mime_type = mime_type
+    def __call__(self, environ, start_response):
+        # Read JSON POST input to jsonfilter.json if matching mime type
+        response = {'status': '200 OK', 'headers': []}
+        def json_start_response(status, headers):
+            response['status'] = status
+            response['headers'].extend(headers)
+        environ['jsonfilter.mime_type'] = self.mime_type
+        if environ.get('REQUEST_METHOD', '') == 'POST':
+            if environ.get('CONTENT_TYPE', '') == self.mime_type:
+                args = [_ for _ in [environ.get('CONTENT_LENGTH')] if _]
+                data = environ['wsgi.input'].read(*map(int, args))
+                environ['jsonfilter.json'] = simplejson.loads(data)
+        res = simplejson.dumps(self.app(environ, json_start_response))
+        jsonp = cgi.parse_qs(environ.get('QUERY_STRING', '')).get('jsonp')
+        if jsonp:
+            content_type = 'text/javascript'
+            res = ''.join(jsonp + ['(', res, ')'])
+        elif 'Opera' in environ.get('HTTP_USER_AGENT', ''):
+            # Opera has bunk XMLHttpRequest support for most mime types
+            content_type = 'text/plain'
+        else:
+            content_type = self.mime_type
+        headers = [
+            ('Content-type', content_type),
+            ('Content-length', len(res)),
+        ]
+        headers.extend(response['headers'])
+        start_response(response['status'], headers)
+        return [res]
+def factory(app, global_conf, **kw):
+    return JSONFilter(app, **kw)
--- a/third_party/WebKit/WebKitTools/simplejson/scanner.py
+++ b/third_party/WebKit/WebKitTools/simplejson/scanner.py
+"""
+Iterator based sre token scanner
+"""
+import sre_parse, sre_compile, sre_constants
+from sre_constants import BRANCH, SUBPATTERN
+from re import VERBOSE, MULTILINE, DOTALL
+import re
+__all__ = ['Scanner', 'pattern']
+FLAGS = (VERBOSE | MULTILINE | DOTALL)
+class Scanner(object):
+    def __init__(self, lexicon, flags=FLAGS):
+        self.actions = [None]
+        # combine phrases into a compound pattern
+        s = sre_parse.Pattern()
+        s.flags = flags
+        p = []
+        for idx, token in enumerate(lexicon):
+            phrase = token.pattern
+            try:
+                subpattern = sre_parse.SubPattern(s,
+                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
+            except sre_constants.error:
+                raise
+            p.append(subpattern)
+            self.actions.append(token)
+        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+        self.scanner = sre_compile.compile(p)
+    def iterscan(self, string, idx=0, context=None):
+        """
+        Yield match, end_idx for each match
+        """
+        match = self.scanner.scanner(string, idx).match
+        actions = self.actions
+        lastend = idx
+        end = len(string)
+        while True:
+            m = match()
+            if m is None:
+                break
+            matchbegin, matchend = m.span()
+            if lastend == matchend:
+                break
+            action = actions[m.lastindex]
+            if action is not None:
+                rval, next_pos = action(m, context)
+                if next_pos is not None and next_pos != matchend:
+                    # "fast forward" the scanner
+                    matchend = next_pos
+                    match = self.scanner.scanner(string, matchend).match
+                yield rval, matchend
+            lastend = matchend
+def pattern(pattern, flags=FLAGS):
+    def decorator(fn):
+        fn.pattern = pattern
+        fn.regex = re.compile(pattern, flags)
+        return fn
+    return decorator