diff --git a/PKG-INFO b/PKG-INFO
index 9014429356322b9422a2bb7069bc7a5adcc22ebc..5e2fc12b5ff3dbbb98079ee5c453c7693a78e0d1 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,1091 +1,1095 @@
-Metadata-Version: 1.1
-Name: regex
-Version: 2021.10.8
-Summary: Alternative regular expression module, to replace re.
-Home-page: https://bitbucket.org/mrabarnett/mrab-regex
-Author: Matthew Barnett
-Author-email: regex@mrabarnett.plus.com
-License: Apache Software License
-Description: Introduction
-        ------------
-        
-        This regex implementation is backwards-compatible with the standard 're' module, but offers additional functionality.
-        
-        Note
-        ----
-        
-        The re module's behaviour with zero-width matches changed in Python 3.7, and this module will follow that behaviour when compiled for Python 3.7.
-        
-        PyPy
-        ----
-        
-        This module is targeted at CPython. It expects that all codepoints are the same width, so it won't behave properly with PyPy outside U+0000..U+007F because PyPy stores strings as UTF-8.
-        
-        Old vs new behaviour
-        --------------------
-        
-        In order to be compatible with the re module, this module has 2 behaviours:
-        
-        * **Version 0** behaviour (old behaviour, compatible with the re module):
-        
-          Please note that the re module's behaviour may change over time, and I'll endeavour to match that behaviour in version 0.
-        
-          * Indicated by the ``VERSION0`` or ``V0`` flag, or ``(?V0)`` in the pattern.
-        
-          * Zero-width matches are not handled correctly in the re module before Python 3.7. The behaviour in those earlier versions is:
-        
-            * ``.split`` won't split a string at a zero-width match.
-        
-            * ``.sub`` will advance by one character after a zero-width match.
-        
-          * Inline flags apply to the entire pattern, and they can't be turned off.
-        
-          * Only simple sets are supported.
-        
-          * Case-insensitive matches in Unicode use simple case-folding by default.
-        
-        * **Version 1** behaviour (new behaviour, possibly different from the re module):
-        
-          * Indicated by the ``VERSION1`` or ``V1`` flag, or ``(?V1)`` in the pattern.
-        
-          * Zero-width matches are handled correctly.
-        
-          * Inline flags apply to the end of the group or pattern, and they can be turned off.
-        
-          * Nested sets and set operations are supported.
-        
-          * Case-insensitive matches in Unicode use full case-folding by default.
-        
-        If no version is specified, the regex module will default to ``regex.DEFAULT_VERSION``.
-        
-        Case-insensitive matches in Unicode
-        -----------------------------------
-        
-        The regex module supports both simple and full case-folding for case-insensitive matches in Unicode. Use of full case-folding can be turned on using the ``FULLCASE`` or ``F`` flag, or ``(?f)`` in the pattern. Please note that this flag affects how the ``IGNORECASE`` flag works; the ``FULLCASE`` flag itself does not turn on case-insensitive matching.
-        
-        In the version 0 behaviour, the flag is off by default.
-        
-        In the version 1 behaviour, the flag is on by default.
-        
-        Nested sets and set operations
-        ------------------------------
-        
-        It's not possible to support both simple sets, as used in the re module, and nested sets at the same time because of a difference in the meaning of an unescaped ``"["`` in a set.
-        
-        For example, the pattern ``[[a-z]--[aeiou]]`` is treated in the version 0 behaviour (simple sets, compatible with the re module) as:
-        
-        * Set containing "[" and the letters "a" to "z"
-        
-        * Literal "--"
-        
-        * Set containing letters "a", "e", "i", "o", "u"
-        
-        * Literal "]"
-        
-        but in the version 1 behaviour (nested sets, enhanced behaviour) as:
-        
-        * Set which is:
-        
-          * Set containing the letters "a" to "z"
-        
-        * but excluding:
-        
-          * Set containing the letters "a", "e", "i", "o", "u"
-        
-        Version 0 behaviour: only simple sets are supported.
-        
-        Version 1 behaviour: nested sets and set operations are supported.
-        
-        Flags
-        -----
-        
-        There are 2 kinds of flag: scoped and global. Scoped flags can apply to only part of a pattern and can be turned on or off; global flags apply to the entire pattern and can only be turned on.
-        
-        The scoped flags are: ``FULLCASE``, ``IGNORECASE``, ``MULTILINE``, ``DOTALL``, ``VERBOSE``, ``WORD``.
-        
-        The global flags are: ``ASCII``, ``BESTMATCH``, ``ENHANCEMATCH``, ``LOCALE``, ``POSIX``, ``REVERSE``, ``UNICODE``, ``VERSION0``, ``VERSION1``.
-        
-        If neither the ``ASCII``, ``LOCALE`` nor ``UNICODE`` flag is specified, it will default to ``UNICODE`` if the regex pattern is a Unicode string and ``ASCII`` if it's a bytestring.
-        
-        The ``ENHANCEMATCH`` flag makes fuzzy matching attempt to improve the fit of the next match that it finds.
-        
-        The ``BESTMATCH`` flag makes fuzzy matching search for the best match instead of the next match.
-        
-        Notes on named capture groups
-        -----------------------------
-        
-        All capture groups have a group number, starting from 1.
-        
-        Groups with the same group name will have the same group number, and groups with a different group name will have a different group number.
-        
-        The same name can be used by more than one group, with later captures 'overwriting' earlier captures. All of the captures of the group will be available from the ``captures`` method of the match object.
-        
-        Group numbers will be reused across different branches of a branch reset, eg. ``(?|(first)|(second))`` has only group 1. If capture groups have different group names then they will, of course, have different group numbers, eg. ``(?|(?P<foo>first)|(?P<bar>second))`` has group 1 ("foo") and group 2 ("bar").
-        
-        In the regex ``(\s+)(?|(?P<foo>[A-Z]+)|(\w+) (?P<foo>[0-9]+)`` there are 2 groups:
-        
-        * ``(\s+)`` is group 1.
-        
-        * ``(?P<foo>[A-Z]+)`` is group 2, also called "foo".
-        
-        * ``(\w+)`` is group 2 because of the branch reset.
-        
-        * ``(?P<foo>[0-9]+)`` is group 2 because it's called "foo".
-        
-        If you want to prevent ``(\w+)`` from being group 2, you need to name it (different name, different group number).
-        
-        Multithreading
-        --------------
-        
-        The regex module releases the GIL during matching on instances of the built-in (immutable) string classes, enabling other Python threads to run concurrently. It is also possible to force the regex module to release the GIL during matching by calling the matching methods with the keyword argument ``concurrent=True``. The behaviour is undefined if the string changes during matching, so use it *only* when it is guaranteed that that won't happen.
-        
-        Unicode
-        -------
-        
-        This module supports Unicode 14.0.0.
-        
-        Full Unicode case-folding is supported.
-        
-        Additional features
-        -------------------
-        
-        The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
-        
-        Added support for lookaround in conditional pattern (`Hg issue 163 <https://bitbucket.org/mrabarnett/mrab-regex/issues/163>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The test of a conditional pattern can now be a lookaround.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
-          <regex.Match object; span=(0, 3), match='123'>
-          >>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
-          <regex.Match object; span=(0, 6), match='abc123'>
-        
-        This is not quite the same as putting a lookaround in the first branch of a pair of alternatives.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
-          <regex.Match object; span=(0, 6), match='123abc'>
-          >>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
-          None
-        
-        In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
-        
-        Added POSIX matching (leftmost longest) (`Hg issue 150 <https://bitbucket.org/mrabarnett/mrab-regex/issues/150>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> # Normal matching.
-          >>> regex.search(r'Mr|Mrs', 'Mrs')
-          <regex.Match object; span=(0, 2), match='Mr'>
-          >>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
-          <regex.Match object; span=(0, 7), match='oneself'>
-          >>> # POSIX matching.
-          >>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
-          <regex.Match object; span=(0, 3), match='Mrs'>
-          >>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
-          <regex.Match object; span=(0, 17), match='oneselfsufficient'>
-        
-        Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
-        
-        Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://bitbucket.org/mrabarnett/mrab-regex/issues/152>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
-          <regex.Match object; span=(0, 11), match='5 elephants'>
-        
-        Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucket.org/mrabarnett/mrab-regex/issues/153>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
-        
-        ``(*SKIP)`` is similar to ``(*PRUNE)``, except that it also sets where in the text the next attempt to match will start. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
-        
-        ``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
-        
-        Added ``\K`` (`Hg issue 151 <https://bitbucket.org/mrabarnett/mrab-regex/issues/151>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
-        
-        It does not affect what capture groups return.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
-          >>> m[0]
-          'cde'
-          >>> m[1]
-          'abcde'
-          >>>
-          >>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
-          >>> m[0]
-          'bc'
-          >>> m[1]
-          'bcdef'
-        
-        Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://bitbucket.org/mrabarnett/mrab-regex/issues/133>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        You can now use subscripting to get the captures of a repeated capture group.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(\w)+", "abc")
-          >>> m.expandf("{1}")
-          'c'
-          >>> m.expandf("{1[0]} {1[1]} {1[2]}")
-          'a b c'
-          >>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
-          'c b a'
-          >>>
-          >>> m = regex.match(r"(?P<letter>\w)+", "abc")
-          >>> m.expandf("{letter}")
-          'c'
-          >>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
-          'a b c'
-          >>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
-          'c b a'
-        
-        Added support for referring to a group by number using ``(?P=...)``.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        This is in addition to the existing ``\g<...>``.
-        
-        Fixed the handling of locale-sensitive regexes.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
-        
-        Added partial matches (`Hg issue 102 <https://bitbucket.org/mrabarnett/mrab-regex/issues/102>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
-        
-        Partial matches are supported by ``match``, ``search``, ``fullmatch`` and ``finditer`` with the ``partial`` keyword argument.
-        
-        Match objects have a ``partial`` attribute, which is ``True`` if it's a partial match.
-        
-        For example, if you wanted a user to enter a 4-digit number and check it character by character as it was being entered:
-        
-        .. sourcecode:: python
-        
-          >>> pattern = regex.compile(r'\d{4}')
-        
-          >>> # Initially, nothing has been entered:
-          >>> print(pattern.fullmatch('', partial=True))
-          <regex.Match object; span=(0, 0), match='', partial=True>
-        
-          >>> # An empty string is OK, but it's only a partial match.
-          >>> # The user enters a letter:
-          >>> print(pattern.fullmatch('a', partial=True))
-          None
-          >>> # It'll never match.
-        
-          >>> # The user deletes that and enters a digit:
-          >>> print(pattern.fullmatch('1', partial=True))
-          <regex.Match object; span=(0, 1), match='1', partial=True>
-          >>> # It matches this far, but it's only a partial match.
-        
-          >>> # The user enters 2 more digits:
-          >>> print(pattern.fullmatch('123', partial=True))
-          <regex.Match object; span=(0, 3), match='123', partial=True>
-          >>> # It matches this far, but it's only a partial match.
-        
-          >>> # The user enters another digit:
-          >>> print(pattern.fullmatch('1234', partial=True))
-          <regex.Match object; span=(0, 4), match='1234'>
-          >>> # It's a complete match.
-        
-          >>> # If the user enters another digit:
-          >>> print(pattern.fullmatch('12345', partial=True))
-          None
-          >>> # It's no longer a match.
-        
-          >>> # This is a partial match:
-          >>> pattern.match('123', partial=True).partial
-          True
-        
-          >>> # This is a complete match:
-          >>> pattern.match('1233', partial=True).partial
-          False
-        
-        ``*`` operator not working correctly with sub() (`Hg issue 106 <https://bitbucket.org/mrabarnett/mrab-regex/issues/106>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          # Python 3.7 and later
-          >>> regex.sub('.*', 'x', 'test')
-          'xx'
-          >>> regex.sub('.*?', '|', 'test')
-          '|||||||||'
-        
-          # Python 3.6 and earlier
-          >>> regex.sub('(?V0).*', 'x', 'test')
-          'x'
-          >>> regex.sub('(?V1).*', 'x', 'test')
-          'xx'
-          >>> regex.sub('(?V0).*?', '|', 'test')
-          '|t|e|s|t|'
-          >>> regex.sub('(?V1).*?', '|', 'test')
-          '|||||||||'
-        
-        Added ``capturesdict`` (`Hg issue 86 <https://bitbucket.org/mrabarnett/mrab-regex/issues/86>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``capturesdict`` is a combination of ``groupdict`` and ``captures``:
-        
-        ``groupdict`` returns a dict of the named groups and the last capture of those groups.
-        
-        ``captures`` returns a list of all the captures of a group
-        
-        ``capturesdict`` returns a dict of the named groups and lists of all the captures of those groups.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
-          >>> m.groupdict()
-          {'word': 'three', 'digits': '3'}
-          >>> m.captures("word")
-          ['one', 'two', 'three']
-          >>> m.captures("digits")
-          ['1', '2', '3']
-          >>> m.capturesdict()
-          {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
-        
-        Allow duplicate names of groups (`Hg issue 87 <https://bitbucket.org/mrabarnett/mrab-regex/issues/87>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Group names can now be duplicated.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> # With optional groups:
-          >>>
-          >>> # Both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['first', 'second']
-          >>> # Only the second group captures.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['second']
-          >>> # Only the first group captures.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")
-          >>> m.group("item")
-          'first'
-          >>> m.captures("item")
-          ['first']
-          >>>
-          >>> # With mandatory groups:
-          >>>
-          >>> # Both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['first', 'second']
-          >>> # Again, both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['', 'second']
-          >>> # And yet again, both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")
-          >>> m.group("item")
-          ''
-          >>> m.captures("item")
-          ['first', '']
-        
-        Added ``fullmatch`` (`issue #16203 <https://bugs.python.org/issue16203>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``fullmatch`` behaves like ``match``, except that it must match all of the string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> print(regex.fullmatch(r"abc", "abc").span())
-          (0, 3)
-          >>> print(regex.fullmatch(r"abc", "abcx"))
-          None
-          >>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
-          (0, 3)
-          >>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
-          (1, 4)
-          >>>
-          >>> regex.match(r"a.*?", "abcd").group(0)
-          'a'
-          >>> regex.fullmatch(r"a.*?", "abcd").group(0)
-          'abcd'
-        
-        Added ``subf`` and ``subfn``
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``subf`` and ``subfn`` are alternatives to ``sub`` and ``subn`` respectively. When passed a replacement string, they treat it as a format string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
-          'foo bar => bar foo'
-          >>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
-          'bar foo'
-        
-        Added ``expandf`` to match object
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``expandf`` is an alternative to ``expand``. When passed a replacement string, it treats it as a format string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(\w+) (\w+)", "foo bar")
-          >>> m.expandf("{0} => {2} {1}")
-          'foo bar => bar foo'
-          >>>
-          >>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
-          >>> m.expandf("{word2} {word1}")
-          'bar foo'
-        
-        Detach searched string
-        ^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object contains a reference to the string that was searched, via its ``string`` attribute. The ``detach_string`` method will 'detach' that string, making it available for garbage collection, which might save valuable memory if that string is very large.
-        
-        Example:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"\w+", "Hello world")
-          >>> print(m.group())
-          Hello
-          >>> print(m.string)
-          Hello world
-          >>> m.detach_string()
-          >>> print(m.group())
-          Hello
-          >>> print(m.string)
-          None
-        
-        Recursive patterns (`Hg issue 27 <https://bitbucket.org/mrabarnett/mrab-regex/issues/27>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Recursive and repeated patterns are supported.
-        
-        ``(?R)`` or ``(?0)`` tries to match the entire regex recursively. ``(?1)``, ``(?2)``, etc, try to match the relevant capture group.
-        
-        ``(?&name)`` tries to match the named capture group.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
-          ('Tarzan',)
-          >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
-          ('Jane',)
-        
-          >>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")
-          >>> m.group(0, 1, 2)
-          ('kayak', 'k', None)
-        
-        The first two examples show how the subpattern within the capture group is reused, but is _not_ itself a capture group. In other words, ``"(Tarzan|Jane) loves (?1)"`` is equivalent to ``"(Tarzan|Jane) loves (?:Tarzan|Jane)"``.
-        
-        It's possible to backtrack into a recursed or repeated group.
-        
-        You can't call a group if there is more than one group with that group name or group number (``"ambiguous group reference"``).
-        
-        The alternative forms ``(?P>name)`` and ``(?P&name)`` are also supported.
-        
-        Full Unicode case-folding is supported.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        In version 1 behaviour, the regex module uses full case-folding when performing case-insensitive matches in Unicode.
-        
-        Examples (in Python 3):
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
-          (0, 6)
-          >>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
-          (0, 7)
-        
-        In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
-        
-        Approximate "fuzzy" matching (`Hg issue 12 <https://bitbucket.org/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://bitbucket.org/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://bitbucket.org/mrabarnett/mrab-regex/issues/109>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
-        
-        A fuzzy regex specifies which types of errors are permitted, and, optionally, either the minimum and maximum or only the maximum permitted number of each type. (You cannot specify only a minimum.)
-        
-        The 3 types of error are:
-        
-        * Insertion, indicated by "i"
-        
-        * Deletion, indicated by "d"
-        
-        * Substitution, indicated by "s"
-        
-        In addition, "e" indicates any type of error.
-        
-        The fuzziness of a regex item is specified between "{" and "}" after the item.
-        
-        Examples:
-        
-        * ``foo`` match "foo" exactly
-        
-        * ``(?:foo){i}`` match "foo", permitting insertions
-        
-        * ``(?:foo){d}`` match "foo", permitting deletions
-        
-        * ``(?:foo){s}`` match "foo", permitting substitutions
-        
-        * ``(?:foo){i,s}`` match "foo", permitting insertions and substitutions
-        
-        * ``(?:foo){e}`` match "foo", permitting errors
-        
-        If a certain type of error is specified, then any type not specified will **not** be permitted.
-        
-        In the following examples I'll omit the item and write only the fuzziness:
-        
-        * ``{d<=3}`` permit at most 3 deletions, but no other types
-        
-        * ``{i<=1,s<=2}`` permit at most 1 insertion and at most 2 substitutions, but no deletions
-        
-        * ``{1<=e<=3}`` permit at least 1 and at most 3 errors
-        
-        * ``{i<=2,d<=2,e<=3}`` permit at most 2 insertions, at most 2 deletions, at most 3 errors in total, but no substitutions
-        
-        It's also possible to state the costs of each type of error and the maximum permitted total cost.
-        
-        Examples:
-        
-        * ``{2i+2d+1s<=4}`` each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
-        
-        * ``{i<=1,d<=1,s<=1,2i+2d+1s<=4}`` at most 1 insertion, at most 1 deletion, at most 1 substitution; each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
-        
-        You can also use "<" instead of "<=" if you want an exclusive minimum or maximum.
-        
-        You can add a test to perform on a character that's substituted or inserted.
-        
-        Examples:
-        
-        * ``{s<=2:[a-z]}`` at most 2 substitutions, which must be in the character set ``[a-z]``.
-        
-        * ``{s<=2,i<=3:\d}`` at most 2 substitutions, at most 3 insertions, which must be digits.
-        
-        By default, fuzzy matching searches for the first match that meets the given constraints. The ``ENHANCEMATCH`` flag will cause it to attempt to improve the fit (i.e. reduce the number of errors) of the match that it has found.
-        
-        The ``BESTMATCH`` flag will make it search for the best match instead.
-        
-        Further examples to note:
-        
-        * ``regex.search("(dog){e}", "cat and dog")[1]`` returns ``"cat"`` because that matches ``"dog"`` with 3 errors (an unlimited number of errors is permitted).
-        
-        * ``regex.search("(dog){e<=1}", "cat and dog")[1]`` returns ``" dog"`` (with a leading space) because that matches ``"dog"`` with 1 error, which is within the limit.
-        
-        * ``regex.search("(?e)(dog){e<=1}", "cat and dog")[1]`` returns ``"dog"`` (without a leading space) because the fuzzy search matches ``" dog"`` with 1 error, which is within the limit, and the ``(?e)`` then it attempts a better fit.
-        
-        In the first two examples there are perfect matches later in the string, but in neither case is it the first possible match.
-        
-        The match object has an attribute ``fuzzy_counts`` which gives the total number of substitutions, insertions and deletions.
-        
-        .. sourcecode:: python
-        
-          >>> # A 'raw' fuzzy match:
-          >>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
-          (0, 0, 1)
-          >>> # 0 substitutions, 0 insertions, 1 deletion.
-        
-          >>> # A better match might be possible if the ENHANCEMATCH flag used:
-          >>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
-          (0, 0, 0)
-          >>> # 0 substitutions, 0 insertions, 0 deletions.
-        
-        The match object also has an attribute ``fuzzy_changes`` which gives a tuple of the positions of the substitutions, insertions and deletions.
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')
-          >>> m
-          <regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>
-          >>> m.fuzzy_changes
-          ([], [7, 8], [10, 11])
-        
-        What this means is that if the matched part of the string had been:
-        
-        .. sourcecode:: python
-        
-          'anacondfuuoo bar'
-        
-        it would've been an exact match.
-        
-        However, there were insertions at positions 7 and 8:
-        
-        .. sourcecode:: python
-        
-          'anaconda fuuoo bar'
-                  ^^
-        
-        and deletions at positions 10 and 11:
-        
-        .. sourcecode:: python
-        
-          'anaconda f~~oo bar'
-                     ^^
-        
-        So the actual string was:
-        
-        .. sourcecode:: python
-        
-          'anaconda foo bar'
-        
-        Named lists (`Hg issue 11 <https://bitbucket.org/mrabarnett/mrab-regex/issues/11>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\L<name>``
-        
-        There are occasions where you may want to include a list (actually, a set) of options in a regex.
-        
-        One way is to build the pattern like this:
-        
-        .. sourcecode:: python
-        
-          >>> p = regex.compile(r"first|second|third|fourth|fifth")
-        
-        but if the list is large, parsing the resulting regex can take considerable time, and care must also be taken that the strings are properly escaped and properly ordered, for example, "cats" before "cat".
-        
-        The new alternative is to use a named list:
-        
-        .. sourcecode:: python
-        
-          >>> option_set = ["first", "second", "third", "fourth", "fifth"]
-          >>> p = regex.compile(r"\L<options>", options=option_set)
-        
-        The order of the items is irrelevant, they are treated as a set. The named lists are available as the ``.named_lists`` attribute of the pattern object :
-        
-        .. sourcecode:: python
-        
-          >>> print(p.named_lists)
-          # Python 3
-          {'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}
-          # Python 2
-          {'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}
-        
-        If there are any unused keyword arguments, ``ValueError`` will be raised unless you tell it otherwise:
-        
-        .. sourcecode:: python
-        
-          >>> option_set = ["first", "second", "third", "fourth", "fifth"]
-          >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
-          Traceback (most recent call last):
-            File "<stdin>", line 1, in <module>
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
-              return _compile(pattern, flags, ignore_unused, kwargs)
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
-              raise ValueError('unused keyword argument {!a}'.format(any_one))
-          ValueError: unused keyword argument 'other_options'
-          >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)
-          >>>
-        
-        Start and end of word
-        ^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\m`` matches at the start of a word.
-        
-        ``\M`` matches at the end of a word.
-        
-        Compare with ``\b``, which matches at the start or end of a word.
-        
-        Unicode line separators
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Normally the only line separator is ``\n`` (``\x0A``), but if the ``WORD`` flag is turned on then the line separators are ``\x0D\x0A``, ``\x0A``, ``\x0B``, ``\x0C`` and ``\x0D``, plus ``\x85``, ``\u2028`` and ``\u2029`` when working with Unicode.
-        
-        This affects the regex dot ``"."``, which, with the ``DOTALL`` flag turned off, matches any character except a line separator. It also affects the line anchors ``^`` and ``$`` (in multiline mode).
-        
-        Set operators
-        ^^^^^^^^^^^^^
-        
-        **Version 1 behaviour only**
-        
-        Set operators have been added, and a set ``[...]`` can include nested sets.
-        
-        The operators, in order of increasing precedence, are:
-        
-        * ``||`` for union ("x||y" means "x or y")
-        
-        * ``~~`` (double tilde) for symmetric difference ("x~~y" means "x or y, but not both")
-        
-        * ``&&`` for intersection ("x&&y" means "x and y")
-        
-        * ``--`` (double dash) for difference ("x--y" means "x but not y")
-        
-        Implicit union, ie, simple juxtaposition like in ``[ab]``, has the highest precedence. Thus, ``[ab&&cd]`` is the same as ``[[a||b]&&[c||d]]``.
-        
-        Examples:
-        
-        * ``[ab]`` # Set containing 'a' and 'b'
-        
-        * ``[a-z]`` # Set containing 'a' .. 'z'
-        
-        * ``[[a-z]--[qw]]`` # Set containing 'a' .. 'z', but not 'q' or 'w'
-        
-        * ``[a-z--qw]`` # Same as above
-        
-        * ``[\p{L}--QW]`` # Set containing all letters except 'Q' and 'W'
-        
-        * ``[\p{N}--[0-9]]`` # Set containing all numbers except '0' .. '9'
-        
-        * ``[\p{ASCII}&&\p{Letter}]`` # Set containing all characters which are ASCII and letter
-        
-        regex.escape (`issue #2650 <https://bugs.python.org/issue2650>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        regex.escape has an additional keyword parameter ``special_only``. When True, only 'special' regex characters, such as '?', are escaped.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.escape("foo!?", special_only=False)
-          'foo\\!\\?'
-          >>> regex.escape("foo!?", special_only=True)
-          'foo!\\?'
-        
-        regex.escape (`Hg issue 249 <https://bitbucket.org/mrabarnett/mrab-regex/issues/249>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.escape("foo bar!?", literal_spaces=False)
-          'foo\\ bar!\\?'
-          >>> regex.escape("foo bar!?", literal_spaces=True)
-          'foo bar!\\?'
-        
-        Repeated captures (`issue #7132 <https://bugs.python.org/issue7132>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object has additional methods which return information on all the successful matches of a repeated capture group. These methods are:
-        
-        * ``matchobject.captures([group1, ...])``
-        
-          * Returns a list of the strings matched in a group or groups. Compare with ``matchobject.group([group1, ...])``.
-        
-        * ``matchobject.starts([group])``
-        
-          * Returns a list of the start positions. Compare with ``matchobject.start([group])``.
-        
-        * ``matchobject.ends([group])``
-        
-          * Returns a list of the end positions. Compare with ``matchobject.end([group])``.
-        
-        * ``matchobject.spans([group])``
-        
-          * Returns a list of the spans. Compare with ``matchobject.span([group])``.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"(\w{3})+", "123456789")
-          >>> m.group(1)
-          '789'
-          >>> m.captures(1)
-          ['123', '456', '789']
-          >>> m.start(1)
-          6
-          >>> m.starts(1)
-          [0, 3, 6]
-          >>> m.end(1)
-          9
-          >>> m.ends(1)
-          [3, 6, 9]
-          >>> m.span(1)
-          (6, 9)
-          >>> m.spans(1)
-          [(0, 3), (3, 6), (6, 9)]
-        
-        Atomic grouping (`issue #433030 <https://bugs.python.org/issue433030>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?>...)``
-        
-        If the following pattern subsequently fails, then the subpattern as a whole will fail.
-        
-        Possessive quantifiers.
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?:...)?+`` ; ``(?:...)*+`` ; ``(?:...)++`` ; ``(?:...){min,max}+``
-        
-        The subpattern is matched up to 'max' times. If the following pattern subsequently fails, then all of the repeated subpatterns will fail as a whole. For example, ``(?:...)++`` is equivalent to ``(?>(?:...)+)``.
-        
-        Scoped flags (`issue #433028 <https://bugs.python.org/issue433028>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?flags-flags:...)``
-        
-        The flags will apply only to the subpattern. Flags can be turned on or off.
-        
-        Definition of 'word' character (`issue #1693050 <https://bugs.python.org/issue1693050>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The definition of a 'word' character has been expanded for Unicode. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
-        
-        Variable-length lookbehind
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A lookbehind can match a variable-length string.
-        
-        Flags argument for regex.split, regex.sub and regex.subn (`issue #3482 <https://bugs.python.org/issue3482>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.split``, ``regex.sub`` and ``regex.subn`` support a 'flags' argument.
-        
-        Pos and endpos arguments for regex.sub and regex.subn
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.sub`` and ``regex.subn`` support 'pos' and 'endpos' arguments.
-        
-        'Overlapped' argument for regex.findall and regex.finditer
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.findall`` and ``regex.finditer`` support an 'overlapped' flag which permits overlapped matches.
-        
-        Splititer
-        ^^^^^^^^^
-        
-        ``regex.splititer`` has been added. It's a generator equivalent of ``regex.split``.
-        
-        Subscripting for groups
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object accepts access to the captured groups via subscripting and slicing:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")
-          >>> print(m["before"])
-          pqr
-          >>> print(len(m))
-          4
-          >>> print(m[:])
-          ('pqr123stu', 'pqr', '123', 'stu')
-        
-        Named groups
-        ^^^^^^^^^^^^
-        
-        Groups can be named with ``(?<name>...)`` as well as the current ``(?P<name>...)``.
-        
-        Group references
-        ^^^^^^^^^^^^^^^^
-        
-        Groups can be referenced within a pattern with ``\g<name>``. This also allows there to be more than 99 groups.
-        
-        Named characters
-        ^^^^^^^^^^^^^^^^
-        
-        ``\N{name}``
-        
-        Named characters are supported. (Note: only those known by Python's Unicode database are supported.)
-        
-        Unicode codepoint properties, including scripts and blocks
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\p{property=value}``; ``\P{property=value}``; ``\p{value}`` ; ``\P{value}``
-        
-        Many Unicode properties are supported, including blocks and scripts. ``\p{property=value}`` or ``\p{property:value}`` matches a character whose property ``property`` has value ``value``. The inverse of ``\p{property=value}`` is ``\P{property=value}`` or ``\p{^property=value}``.
-        
-        If the short form ``\p{value}`` is used, the properties are checked in the order: ``General_Category``, ``Script``, ``Block``, binary property:
-        
-        * ``Latin``, the 'Latin' script (``Script=Latin``).
-        
-        * ``BasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
-        
-        * ``Alphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
-        
-        A short form starting with ``Is`` indicates a script or binary property:
-        
-        * ``IsLatin``, the 'Latin' script (``Script=Latin``).
-        
-        * ``IsAlphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
-        
-        A short form starting with ``In`` indicates a block property:
-        
-        * ``InBasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
-        
-        POSIX character classes
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``[[:alpha:]]``; ``[[:^alpha:]]``
-        
-        POSIX character classes are supported. These are normally treated as an alternative form of ``\p{...}``.
-        
-        The exceptions are ``alnum``, ``digit``, ``punct`` and ``xdigit``, whose definitions are different from those of Unicode.
-        
-        ``[[:alnum:]]`` is equivalent to ``\p{posix_alnum}``.
-        
-        ``[[:digit:]]`` is equivalent to ``\p{posix_digit}``.
-        
-        ``[[:punct:]]`` is equivalent to ``\p{posix_punct}``.
-        
-        ``[[:xdigit:]]`` is equivalent to ``\p{posix_xdigit}``.
-        
-        Search anchor
-        ^^^^^^^^^^^^^
-        
-        ``\G``
-        
-        A search anchor has been added. It matches at the position where each search started/continued and can be used for contiguous matches or in negative variable-length lookbehinds to limit how far back the lookbehind goes:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r"\w{2}", "abcd ef")
-          ['ab', 'cd', 'ef']
-          >>> regex.findall(r"\G\w{2}", "abcd ef")
-          ['ab', 'cd']
-        
-        * The search starts at position 0 and matches 2 letters 'ab'.
-        
-        * The search continues at position 2 and matches 2 letters 'cd'.
-        
-        * The search continues at position 4 and fails to match any letters.
-        
-        * The anchor stops the search start position from being advanced, so there are no more results.
-        
-        Reverse searching
-        ^^^^^^^^^^^^^^^^^
-        
-        Searches can now work backwards:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r".", "abc")
-          ['a', 'b', 'c']
-          >>> regex.findall(r"(?r).", "abc")
-          ['c', 'b', 'a']
-        
-        Note: the result of a reverse search is not necessarily the reverse of a forward search:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r"..", "abcde")
-          ['ab', 'cd']
-          >>> regex.findall(r"(?r)..", "abcde")
-          ['de', 'bc']
-        
-        Matching a single grapheme
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\X``
-        
-        The grapheme matcher is supported. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
-        
-        Branch reset
-        ^^^^^^^^^^^^
-        
-        ``(?|...|...)``
-        
-        Capture group numbers will be reused across the alternatives, but groups with different names will have different group numbers.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(?|(first)|(second))", "first").groups()
-          ('first',)
-          >>> regex.match(r"(?|(first)|(second))", "second").groups()
-          ('second',)
-        
-        Note that there is only one group.
-        
-        Default Unicode word boundary
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The ``WORD`` flag changes the definition of a 'word boundary' to that of a default Unicode word boundary. This applies to ``\b`` and ``\B``.
-        
-        Timeout (Python 3)
-        ^^^^^^^^^^^^^^^^^^
-        
-        The matching methods and functions support timeouts. The timeout (in seconds) applies to the entire operation:
-        
-        .. sourcecode:: python
-        
-          >>> from time import sleep
-          >>>
-          >>> def fast_replace(m):
-          ...     return 'X'
-          ...
-          >>> def slow_replace(m):
-          ...     sleep(0.5)
-          ...     return 'X'
-          ...
-          >>> regex.sub(r'[a-z]', fast_replace, 'abcde', timeout=2)
-          'XXXXX'
-          >>> regex.sub(r'[a-z]', slow_replace, 'abcde', timeout=2)
-          Traceback (most recent call last):
-            File "<stdin>", line 1, in <module>
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 276, in sub
-              endpos, concurrent, timeout)
-          TimeoutError: regex timed out
-        
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Topic :: Scientific/Engineering :: Information Analysis
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Topic :: Text Processing
-Classifier: Topic :: Text Processing :: General
+Metadata-Version: 2.1
+Name: regex
+Version: 2021.11.10
+Summary: Alternative regular expression module, to replace re.
+Home-page: https://github.com/mrabarnett/mrab-regex
+Author: Matthew Barnett
+Author-email: regex@mrabarnett.plus.com
+License: Apache Software License
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing
+Classifier: Topic :: Text Processing :: General
+Description-Content-Type: text/x-rst
+License-File: LICENSE.txt
+
+Introduction
+------------
+
+This regex implementation is backwards-compatible with the standard 're' module, but offers additional functionality.
+
+Note
+----
+
+The re module's behaviour with zero-width matches changed in Python 3.7, and this module will follow that behaviour when compiled for Python 3.7.
+
+PyPy
+----
+
+This module is targeted at CPython. It expects that all codepoints are the same width, so it won't behave properly with PyPy outside U+0000..U+007F because PyPy stores strings as UTF-8.
+
+Old vs new behaviour
+--------------------
+
+In order to be compatible with the re module, this module has 2 behaviours:
+
+* **Version 0** behaviour (old behaviour, compatible with the re module):
+
+  Please note that the re module's behaviour may change over time, and I'll endeavour to match that behaviour in version 0.
+
+  * Indicated by the ``VERSION0`` or ``V0`` flag, or ``(?V0)`` in the pattern.
+
+  * Zero-width matches are not handled correctly in the re module before Python 3.7. The behaviour in those earlier versions is:
+
+    * ``.split`` won't split a string at a zero-width match.
+
+    * ``.sub`` will advance by one character after a zero-width match.
+
+  * Inline flags apply to the entire pattern, and they can't be turned off.
+
+  * Only simple sets are supported.
+
+  * Case-insensitive matches in Unicode use simple case-folding by default.
+
+* **Version 1** behaviour (new behaviour, possibly different from the re module):
+
+  * Indicated by the ``VERSION1`` or ``V1`` flag, or ``(?V1)`` in the pattern.
+
+  * Zero-width matches are handled correctly.
+
+  * Inline flags apply to the end of the group or pattern, and they can be turned off.
+
+  * Nested sets and set operations are supported.
+
+  * Case-insensitive matches in Unicode use full case-folding by default.
+
+If no version is specified, the regex module will default to ``regex.DEFAULT_VERSION``.
+
+Case-insensitive matches in Unicode
+-----------------------------------
+
+The regex module supports both simple and full case-folding for case-insensitive matches in Unicode. Use of full case-folding can be turned on using the ``FULLCASE`` or ``F`` flag, or ``(?f)`` in the pattern. Please note that this flag affects how the ``IGNORECASE`` flag works; the ``FULLCASE`` flag itself does not turn on case-insensitive matching.
+
+In the version 0 behaviour, the flag is off by default.
+
+In the version 1 behaviour, the flag is on by default.
+
+Nested sets and set operations
+------------------------------
+
+It's not possible to support both simple sets, as used in the re module, and nested sets at the same time because of a difference in the meaning of an unescaped ``"["`` in a set.
+
+For example, the pattern ``[[a-z]--[aeiou]]`` is treated in the version 0 behaviour (simple sets, compatible with the re module) as:
+
+* Set containing "[" and the letters "a" to "z"
+
+* Literal "--"
+
+* Set containing letters "a", "e", "i", "o", "u"
+
+* Literal "]"
+
+but in the version 1 behaviour (nested sets, enhanced behaviour) as:
+
+* Set which is:
+
+  * Set containing the letters "a" to "z"
+
+* but excluding:
+
+  * Set containing the letters "a", "e", "i", "o", "u"
+
+Version 0 behaviour: only simple sets are supported.
+
+Version 1 behaviour: nested sets and set operations are supported.
+
+Flags
+-----
+
+There are 2 kinds of flag: scoped and global. Scoped flags can apply to only part of a pattern and can be turned on or off; global flags apply to the entire pattern and can only be turned on.
+
+The scoped flags are: ``FULLCASE``, ``IGNORECASE``, ``MULTILINE``, ``DOTALL``, ``VERBOSE``, ``WORD``.
+
+The global flags are: ``ASCII``, ``BESTMATCH``, ``ENHANCEMATCH``, ``LOCALE``, ``POSIX``, ``REVERSE``, ``UNICODE``, ``VERSION0``, ``VERSION1``.
+
+If neither the ``ASCII``, ``LOCALE`` nor ``UNICODE`` flag is specified, it will default to ``UNICODE`` if the regex pattern is a Unicode string and ``ASCII`` if it's a bytestring.
+
+The ``ENHANCEMATCH`` flag makes fuzzy matching attempt to improve the fit of the next match that it finds.
+
+The ``BESTMATCH`` flag makes fuzzy matching search for the best match instead of the next match.
+
+Notes on named capture groups
+-----------------------------
+
+All capture groups have a group number, starting from 1.
+
+Groups with the same group name will have the same group number, and groups with a different group name will have a different group number.
+
+The same name can be used by more than one group, with later captures 'overwriting' earlier captures. All of the captures of the group will be available from the ``captures`` method of the match object.
+
+Group numbers will be reused across different branches of a branch reset, eg. ``(?|(first)|(second))`` has only group 1. If capture groups have different group names then they will, of course, have different group numbers, eg. ``(?|(?P<foo>first)|(?P<bar>second))`` has group 1 ("foo") and group 2 ("bar").
+
+In the regex ``(\s+)(?|(?P<foo>[A-Z]+)|(\w+) (?P<foo>[0-9]+)`` there are 2 groups:
+
+* ``(\s+)`` is group 1.
+
+* ``(?P<foo>[A-Z]+)`` is group 2, also called "foo".
+
+* ``(\w+)`` is group 2 because of the branch reset.
+
+* ``(?P<foo>[0-9]+)`` is group 2 because it's called "foo".
+
+If you want to prevent ``(\w+)`` from being group 2, you need to name it (different name, different group number).
+
+Multithreading
+--------------
+
+The regex module releases the GIL during matching on instances of the built-in (immutable) string classes, enabling other Python threads to run concurrently. It is also possible to force the regex module to release the GIL during matching by calling the matching methods with the keyword argument ``concurrent=True``. The behaviour is undefined if the string changes during matching, so use it *only* when it is guaranteed that that won't happen.
+
+Unicode
+-------
+
+This module supports Unicode 14.0.0.
+
+Full Unicode case-folding is supported.
+
+Additional features
+-------------------
+
+The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
+
+Added support for lookaround in conditional pattern (`Hg issue 163 <https://github.com/mrabarnett/mrab-regex/issues/163>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The test of a conditional pattern can now be a lookaround.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
+  <regex.Match object; span=(0, 3), match='123'>
+  >>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
+  <regex.Match object; span=(0, 6), match='abc123'>
+
+This is not quite the same as putting a lookaround in the first branch of a pair of alternatives.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
+  <regex.Match object; span=(0, 6), match='123abc'>
+  >>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
+  None
+
+In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
+
+Added POSIX matching (leftmost longest) (`Hg issue 150 <https://github.com/mrabarnett/mrab-regex/issues/150>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> # Normal matching.
+  >>> regex.search(r'Mr|Mrs', 'Mrs')
+  <regex.Match object; span=(0, 2), match='Mr'>
+  >>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
+  <regex.Match object; span=(0, 7), match='oneself'>
+  >>> # POSIX matching.
+  >>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
+  <regex.Match object; span=(0, 3), match='Mrs'>
+  >>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
+  <regex.Match object; span=(0, 17), match='oneselfsufficient'>
+
+Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
+
+Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://github.com/mrabarnett/mrab-regex/issues/152>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
+  <regex.Match object; span=(0, 11), match='5 elephants'>
+
+Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://github.com/mrabarnett/mrab-regex/issues/153>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
+
+``(*SKIP)`` is similar to ``(*PRUNE)``, except that it also sets where in the text the next attempt to match will start. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
+
+``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
+
+Added ``\K`` (`Hg issue 151 <https://github.com/mrabarnett/mrab-regex/issues/151>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
+
+It does not affect what capture groups return.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
+  >>> m[0]
+  'cde'
+  >>> m[1]
+  'abcde'
+  >>>
+  >>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
+  >>> m[0]
+  'bc'
+  >>> m[1]
+  'bcdef'
+
+Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://github.com/mrabarnett/mrab-regex/issues/133>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can now use subscripting to get the captures of a repeated capture group.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(\w)+", "abc")
+  >>> m.expandf("{1}")
+  'c'
+  >>> m.expandf("{1[0]} {1[1]} {1[2]}")
+  'a b c'
+  >>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
+  'c b a'
+  >>>
+  >>> m = regex.match(r"(?P<letter>\w)+", "abc")
+  >>> m.expandf("{letter}")
+  'c'
+  >>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
+  'a b c'
+  >>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
+  'c b a'
+
+Added support for referring to a group by number using ``(?P=...)``.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is in addition to the existing ``\g<...>``.
+
+Fixed the handling of locale-sensitive regexes.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
+
+Added partial matches (`Hg issue 102 <https://github.com/mrabarnett/mrab-regex/issues/102>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
+
+Partial matches are supported by ``match``, ``search``, ``fullmatch`` and ``finditer`` with the ``partial`` keyword argument.
+
+Match objects have a ``partial`` attribute, which is ``True`` if it's a partial match.
+
+For example, if you wanted a user to enter a 4-digit number and check it character by character as it was being entered:
+
+.. sourcecode:: python
+
+  >>> pattern = regex.compile(r'\d{4}')
+
+  >>> # Initially, nothing has been entered:
+  >>> print(pattern.fullmatch('', partial=True))
+  <regex.Match object; span=(0, 0), match='', partial=True>
+
+  >>> # An empty string is OK, but it's only a partial match.
+  >>> # The user enters a letter:
+  >>> print(pattern.fullmatch('a', partial=True))
+  None
+  >>> # It'll never match.
+
+  >>> # The user deletes that and enters a digit:
+  >>> print(pattern.fullmatch('1', partial=True))
+  <regex.Match object; span=(0, 1), match='1', partial=True>
+  >>> # It matches this far, but it's only a partial match.
+
+  >>> # The user enters 2 more digits:
+  >>> print(pattern.fullmatch('123', partial=True))
+  <regex.Match object; span=(0, 3), match='123', partial=True>
+  >>> # It matches this far, but it's only a partial match.
+
+  >>> # The user enters another digit:
+  >>> print(pattern.fullmatch('1234', partial=True))
+  <regex.Match object; span=(0, 4), match='1234'>
+  >>> # It's a complete match.
+
+  >>> # If the user enters another digit:
+  >>> print(pattern.fullmatch('12345', partial=True))
+  None
+  >>> # It's no longer a match.
+
+  >>> # This is a partial match:
+  >>> pattern.match('123', partial=True).partial
+  True
+
+  >>> # This is a complete match:
+  >>> pattern.match('1233', partial=True).partial
+  False
+
+``*`` operator not working correctly with sub() (`Hg issue 106 <https://github.com/mrabarnett/mrab-regex/issues/106>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
+
+Examples:
+
+.. sourcecode:: python
+
+  # Python 3.7 and later
+  >>> regex.sub('.*', 'x', 'test')
+  'xx'
+  >>> regex.sub('.*?', '|', 'test')
+  '|||||||||'
+
+  # Python 3.6 and earlier
+  >>> regex.sub('(?V0).*', 'x', 'test')
+  'x'
+  >>> regex.sub('(?V1).*', 'x', 'test')
+  'xx'
+  >>> regex.sub('(?V0).*?', '|', 'test')
+  '|t|e|s|t|'
+  >>> regex.sub('(?V1).*?', '|', 'test')
+  '|||||||||'
+
+Added ``capturesdict`` (`Hg issue 86 <https://github.com/mrabarnett/mrab-regex/issues/86>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``capturesdict`` is a combination of ``groupdict`` and ``captures``:
+
+``groupdict`` returns a dict of the named groups and the last capture of those groups.
+
+``captures`` returns a list of all the captures of a group
+
+``capturesdict`` returns a dict of the named groups and lists of all the captures of those groups.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
+  >>> m.groupdict()
+  {'word': 'three', 'digits': '3'}
+  >>> m.captures("word")
+  ['one', 'two', 'three']
+  >>> m.captures("digits")
+  ['1', '2', '3']
+  >>> m.capturesdict()
+  {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
+
+Allow duplicate names of groups (`Hg issue 87 <https://github.com/mrabarnett/mrab-regex/issues/87>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Group names can now be duplicated.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> # With optional groups:
+  >>>
+  >>> # Both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['first', 'second']
+  >>> # Only the second group captures.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['second']
+  >>> # Only the first group captures.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")
+  >>> m.group("item")
+  'first'
+  >>> m.captures("item")
+  ['first']
+  >>>
+  >>> # With mandatory groups:
+  >>>
+  >>> # Both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['first', 'second']
+  >>> # Again, both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['', 'second']
+  >>> # And yet again, both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")
+  >>> m.group("item")
+  ''
+  >>> m.captures("item")
+  ['first', '']
+
+Added ``fullmatch`` (`issue #16203 <https://bugs.python.org/issue16203>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``fullmatch`` behaves like ``match``, except that it must match all of the string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> print(regex.fullmatch(r"abc", "abc").span())
+  (0, 3)
+  >>> print(regex.fullmatch(r"abc", "abcx"))
+  None
+  >>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
+  (0, 3)
+  >>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
+  (1, 4)
+  >>>
+  >>> regex.match(r"a.*?", "abcd").group(0)
+  'a'
+  >>> regex.fullmatch(r"a.*?", "abcd").group(0)
+  'abcd'
+
+Added ``subf`` and ``subfn``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``subf`` and ``subfn`` are alternatives to ``sub`` and ``subn`` respectively. When passed a replacement string, they treat it as a format string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
+  'foo bar => bar foo'
+  >>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
+  'bar foo'
+
+Added ``expandf`` to match object
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``expandf`` is an alternative to ``expand``. When passed a replacement string, it treats it as a format string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(\w+) (\w+)", "foo bar")
+  >>> m.expandf("{0} => {2} {1}")
+  'foo bar => bar foo'
+  >>>
+  >>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
+  >>> m.expandf("{word2} {word1}")
+  'bar foo'
+
+Detach searched string
+^^^^^^^^^^^^^^^^^^^^^^
+
+A match object contains a reference to the string that was searched, via its ``string`` attribute. The ``detach_string`` method will 'detach' that string, making it available for garbage collection, which might save valuable memory if that string is very large.
+
+Example:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"\w+", "Hello world")
+  >>> print(m.group())
+  Hello
+  >>> print(m.string)
+  Hello world
+  >>> m.detach_string()
+  >>> print(m.group())
+  Hello
+  >>> print(m.string)
+  None
+
+Recursive patterns (`Hg issue 27 <https://github.com/mrabarnett/mrab-regex/issues/27>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Recursive and repeated patterns are supported.
+
+``(?R)`` or ``(?0)`` tries to match the entire regex recursively. ``(?1)``, ``(?2)``, etc, try to match the relevant capture group.
+
+``(?&name)`` tries to match the named capture group.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
+  ('Tarzan',)
+  >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
+  ('Jane',)
+
+  >>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")
+  >>> m.group(0, 1, 2)
+  ('kayak', 'k', None)
+
+The first two examples show how the subpattern within the capture group is reused, but is _not_ itself a capture group. In other words, ``"(Tarzan|Jane) loves (?1)"`` is equivalent to ``"(Tarzan|Jane) loves (?:Tarzan|Jane)"``.
+
+It's possible to backtrack into a recursed or repeated group.
+
+You can't call a group if there is more than one group with that group name or group number (``"ambiguous group reference"``).
+
+The alternative forms ``(?P>name)`` and ``(?P&name)`` are also supported.
+
+Full Unicode case-folding is supported.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In version 1 behaviour, the regex module uses full case-folding when performing case-insensitive matches in Unicode.
+
+Examples (in Python 3):
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
+  (0, 6)
+  >>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
+  (0, 7)
+
+In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
+
+Approximate "fuzzy" matching (`Hg issue 12 <https://github.com/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://github.com/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://github.com/mrabarnett/mrab-regex/issues/109>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
+
+A fuzzy regex specifies which types of errors are permitted, and, optionally, either the minimum and maximum or only the maximum permitted number of each type. (You cannot specify only a minimum.)
+
+The 3 types of error are:
+
+* Insertion, indicated by "i"
+
+* Deletion, indicated by "d"
+
+* Substitution, indicated by "s"
+
+In addition, "e" indicates any type of error.
+
+The fuzziness of a regex item is specified between "{" and "}" after the item.
+
+Examples:
+
+* ``foo`` match "foo" exactly
+
+* ``(?:foo){i}`` match "foo", permitting insertions
+
+* ``(?:foo){d}`` match "foo", permitting deletions
+
+* ``(?:foo){s}`` match "foo", permitting substitutions
+
+* ``(?:foo){i,s}`` match "foo", permitting insertions and substitutions
+
+* ``(?:foo){e}`` match "foo", permitting errors
+
+If a certain type of error is specified, then any type not specified will **not** be permitted.
+
+In the following examples I'll omit the item and write only the fuzziness:
+
+* ``{d<=3}`` permit at most 3 deletions, but no other types
+
+* ``{i<=1,s<=2}`` permit at most 1 insertion and at most 2 substitutions, but no deletions
+
+* ``{1<=e<=3}`` permit at least 1 and at most 3 errors
+
+* ``{i<=2,d<=2,e<=3}`` permit at most 2 insertions, at most 2 deletions, at most 3 errors in total, but no substitutions
+
+It's also possible to state the costs of each type of error and the maximum permitted total cost.
+
+Examples:
+
+* ``{2i+2d+1s<=4}`` each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
+
+* ``{i<=1,d<=1,s<=1,2i+2d+1s<=4}`` at most 1 insertion, at most 1 deletion, at most 1 substitution; each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
+
+You can also use "<" instead of "<=" if you want an exclusive minimum or maximum.
+
+You can add a test to perform on a character that's substituted or inserted.
+
+Examples:
+
+* ``{s<=2:[a-z]}`` at most 2 substitutions, which must be in the character set ``[a-z]``.
+
+* ``{s<=2,i<=3:\d}`` at most 2 substitutions, at most 3 insertions, which must be digits.
+
+By default, fuzzy matching searches for the first match that meets the given constraints. The ``ENHANCEMATCH`` flag will cause it to attempt to improve the fit (i.e. reduce the number of errors) of the match that it has found.
+
+The ``BESTMATCH`` flag will make it search for the best match instead.
+
+Further examples to note:
+
+* ``regex.search("(dog){e}", "cat and dog")[1]`` returns ``"cat"`` because that matches ``"dog"`` with 3 errors (an unlimited number of errors is permitted).
+
+* ``regex.search("(dog){e<=1}", "cat and dog")[1]`` returns ``" dog"`` (with a leading space) because that matches ``"dog"`` with 1 error, which is within the limit.
+
+* ``regex.search("(?e)(dog){e<=1}", "cat and dog")[1]`` returns ``"dog"`` (without a leading space) because the fuzzy search matches ``" dog"`` with 1 error, which is within the limit, and the ``(?e)`` then it attempts a better fit.
+
+In the first two examples there are perfect matches later in the string, but in neither case is it the first possible match.
+
+The match object has an attribute ``fuzzy_counts`` which gives the total number of substitutions, insertions and deletions.
+
+.. sourcecode:: python
+
+  >>> # A 'raw' fuzzy match:
+  >>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
+  (0, 0, 1)
+  >>> # 0 substitutions, 0 insertions, 1 deletion.
+
+  >>> # A better match might be possible if the ENHANCEMATCH flag used:
+  >>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
+  (0, 0, 0)
+  >>> # 0 substitutions, 0 insertions, 0 deletions.
+
+The match object also has an attribute ``fuzzy_changes`` which gives a tuple of the positions of the substitutions, insertions and deletions.
+
+.. sourcecode:: python
+
+  >>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')
+  >>> m
+  <regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>
+  >>> m.fuzzy_changes
+  ([], [7, 8], [10, 11])
+
+What this means is that if the matched part of the string had been:
+
+.. sourcecode:: python
+
+  'anacondfuuoo bar'
+
+it would've been an exact match.
+
+However, there were insertions at positions 7 and 8:
+
+.. sourcecode:: python
+
+  'anaconda fuuoo bar'
+          ^^
+
+and deletions at positions 10 and 11:
+
+.. sourcecode:: python
+
+  'anaconda f~~oo bar'
+             ^^
+
+So the actual string was:
+
+.. sourcecode:: python
+
+  'anaconda foo bar'
+
+Named lists (`Hg issue 11 <https://github.com/mrabarnett/mrab-regex/issues/11>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\L<name>``
+
+There are occasions where you may want to include a list (actually, a set) of options in a regex.
+
+One way is to build the pattern like this:
+
+.. sourcecode:: python
+
+  >>> p = regex.compile(r"first|second|third|fourth|fifth")
+
+but if the list is large, parsing the resulting regex can take considerable time, and care must also be taken that the strings are properly escaped and properly ordered, for example, "cats" before "cat".
+
+The new alternative is to use a named list:
+
+.. sourcecode:: python
+
+  >>> option_set = ["first", "second", "third", "fourth", "fifth"]
+  >>> p = regex.compile(r"\L<options>", options=option_set)
+
+The order of the items is irrelevant, they are treated as a set. The named lists are available as the ``.named_lists`` attribute of the pattern object :
+
+.. sourcecode:: python
+
+  >>> print(p.named_lists)
+  # Python 3
+  {'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}
+  # Python 2
+  {'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}
+
+If there are any unused keyword arguments, ``ValueError`` will be raised unless you tell it otherwise:
+
+.. sourcecode:: python
+
+  >>> option_set = ["first", "second", "third", "fourth", "fifth"]
+  >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
+  Traceback (most recent call last):
+    File "<stdin>", line 1, in <module>
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
+      return _compile(pattern, flags, ignore_unused, kwargs)
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
+      raise ValueError('unused keyword argument {!a}'.format(any_one))
+  ValueError: unused keyword argument 'other_options'
+  >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)
+  >>>
+
+Start and end of word
+^^^^^^^^^^^^^^^^^^^^^
+
+``\m`` matches at the start of a word.
+
+``\M`` matches at the end of a word.
+
+Compare with ``\b``, which matches at the start or end of a word.
+
+Unicode line separators
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Normally the only line separator is ``\n`` (``\x0A``), but if the ``WORD`` flag is turned on then the line separators are ``\x0D\x0A``, ``\x0A``, ``\x0B``, ``\x0C`` and ``\x0D``, plus ``\x85``, ``\u2028`` and ``\u2029`` when working with Unicode.
+
+This affects the regex dot ``"."``, which, with the ``DOTALL`` flag turned off, matches any character except a line separator. It also affects the line anchors ``^`` and ``$`` (in multiline mode).
+
+Set operators
+^^^^^^^^^^^^^
+
+**Version 1 behaviour only**
+
+Set operators have been added, and a set ``[...]`` can include nested sets.
+
+The operators, in order of increasing precedence, are:
+
+* ``||`` for union ("x||y" means "x or y")
+
+* ``~~`` (double tilde) for symmetric difference ("x~~y" means "x or y, but not both")
+
+* ``&&`` for intersection ("x&&y" means "x and y")
+
+* ``--`` (double dash) for difference ("x--y" means "x but not y")
+
+Implicit union, ie, simple juxtaposition like in ``[ab]``, has the highest precedence. Thus, ``[ab&&cd]`` is the same as ``[[a||b]&&[c||d]]``.
+
+Examples:
+
+* ``[ab]`` # Set containing 'a' and 'b'
+
+* ``[a-z]`` # Set containing 'a' .. 'z'
+
+* ``[[a-z]--[qw]]`` # Set containing 'a' .. 'z', but not 'q' or 'w'
+
+* ``[a-z--qw]`` # Same as above
+
+* ``[\p{L}--QW]`` # Set containing all letters except 'Q' and 'W'
+
+* ``[\p{N}--[0-9]]`` # Set containing all numbers except '0' .. '9'
+
+* ``[\p{ASCII}&&\p{Letter}]`` # Set containing all characters which are ASCII and letter
+
+regex.escape (`issue #2650 <https://bugs.python.org/issue2650>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+regex.escape has an additional keyword parameter ``special_only``. When True, only 'special' regex characters, such as '?', are escaped.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.escape("foo!?", special_only=False)
+  'foo\\!\\?'
+  >>> regex.escape("foo!?", special_only=True)
+  'foo!\\?'
+
+regex.escape (`Hg issue 249 <https://github.com/mrabarnett/mrab-regex/issues/249>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.escape("foo bar!?", literal_spaces=False)
+  'foo\\ bar!\\?'
+  >>> regex.escape("foo bar!?", literal_spaces=True)
+  'foo bar!\\?'
+
+Repeated captures (`issue #7132 <https://bugs.python.org/issue7132>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A match object has additional methods which return information on all the successful matches of a repeated capture group. These methods are:
+
+* ``matchobject.captures([group1, ...])``
+
+  * Returns a list of the strings matched in a group or groups. Compare with ``matchobject.group([group1, ...])``.
+
+* ``matchobject.starts([group])``
+
+  * Returns a list of the start positions. Compare with ``matchobject.start([group])``.
+
+* ``matchobject.ends([group])``
+
+  * Returns a list of the end positions. Compare with ``matchobject.end([group])``.
+
+* ``matchobject.spans([group])``
+
+  * Returns a list of the spans. Compare with ``matchobject.span([group])``.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"(\w{3})+", "123456789")
+  >>> m.group(1)
+  '789'
+  >>> m.captures(1)
+  ['123', '456', '789']
+  >>> m.start(1)
+  6
+  >>> m.starts(1)
+  [0, 3, 6]
+  >>> m.end(1)
+  9
+  >>> m.ends(1)
+  [3, 6, 9]
+  >>> m.span(1)
+  (6, 9)
+  >>> m.spans(1)
+  [(0, 3), (3, 6), (6, 9)]
+
+Atomic grouping (`issue #433030 <https://bugs.python.org/issue433030>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?>...)``
+
+If the following pattern subsequently fails, then the subpattern as a whole will fail.
+
+Possessive quantifiers.
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?:...)?+`` ; ``(?:...)*+`` ; ``(?:...)++`` ; ``(?:...){min,max}+``
+
+The subpattern is matched up to 'max' times. If the following pattern subsequently fails, then all of the repeated subpatterns will fail as a whole. For example, ``(?:...)++`` is equivalent to ``(?>(?:...)+)``.
+
+Scoped flags (`issue #433028 <https://bugs.python.org/issue433028>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?flags-flags:...)``
+
+The flags will apply only to the subpattern. Flags can be turned on or off.
+
+Definition of 'word' character (`issue #1693050 <https://bugs.python.org/issue1693050>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The definition of a 'word' character has been expanded for Unicode. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
+
+Variable-length lookbehind
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A lookbehind can match a variable-length string.
+
+Flags argument for regex.split, regex.sub and regex.subn (`issue #3482 <https://bugs.python.org/issue3482>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.split``, ``regex.sub`` and ``regex.subn`` support a 'flags' argument.
+
+Pos and endpos arguments for regex.sub and regex.subn
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.sub`` and ``regex.subn`` support 'pos' and 'endpos' arguments.
+
+'Overlapped' argument for regex.findall and regex.finditer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.findall`` and ``regex.finditer`` support an 'overlapped' flag which permits overlapped matches.
+
+Splititer
+^^^^^^^^^
+
+``regex.splititer`` has been added. It's a generator equivalent of ``regex.split``.
+
+Subscripting for groups
+^^^^^^^^^^^^^^^^^^^^^^^
+
+A match object accepts access to the captured groups via subscripting and slicing:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")
+  >>> print(m["before"])
+  pqr
+  >>> print(len(m))
+  4
+  >>> print(m[:])
+  ('pqr123stu', 'pqr', '123', 'stu')
+
+Named groups
+^^^^^^^^^^^^
+
+Groups can be named with ``(?<name>...)`` as well as the current ``(?P<name>...)``.
+
+Group references
+^^^^^^^^^^^^^^^^
+
+Groups can be referenced within a pattern with ``\g<name>``. This also allows there to be more than 99 groups.
+
+Named characters
+^^^^^^^^^^^^^^^^
+
+``\N{name}``
+
+Named characters are supported. (Note: only those known by Python's Unicode database are supported.)
+
+Unicode codepoint properties, including scripts and blocks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\p{property=value}``; ``\P{property=value}``; ``\p{value}`` ; ``\P{value}``
+
+Many Unicode properties are supported, including blocks and scripts. ``\p{property=value}`` or ``\p{property:value}`` matches a character whose property ``property`` has value ``value``. The inverse of ``\p{property=value}`` is ``\P{property=value}`` or ``\p{^property=value}``.
+
+If the short form ``\p{value}`` is used, the properties are checked in the order: ``General_Category``, ``Script``, ``Block``, binary property:
+
+* ``Latin``, the 'Latin' script (``Script=Latin``).
+
+* ``BasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
+
+* ``Alphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
+
+A short form starting with ``Is`` indicates a script or binary property:
+
+* ``IsLatin``, the 'Latin' script (``Script=Latin``).
+
+* ``IsAlphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
+
+A short form starting with ``In`` indicates a block property:
+
+* ``InBasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
+
+POSIX character classes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``[[:alpha:]]``; ``[[:^alpha:]]``
+
+POSIX character classes are supported. These are normally treated as an alternative form of ``\p{...}``.
+
+The exceptions are ``alnum``, ``digit``, ``punct`` and ``xdigit``, whose definitions are different from those of Unicode.
+
+``[[:alnum:]]`` is equivalent to ``\p{posix_alnum}``.
+
+``[[:digit:]]`` is equivalent to ``\p{posix_digit}``.
+
+``[[:punct:]]`` is equivalent to ``\p{posix_punct}``.
+
+``[[:xdigit:]]`` is equivalent to ``\p{posix_xdigit}``.
+
+Search anchor
+^^^^^^^^^^^^^
+
+``\G``
+
+A search anchor has been added. It matches at the position where each search started/continued and can be used for contiguous matches or in negative variable-length lookbehinds to limit how far back the lookbehind goes:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r"\w{2}", "abcd ef")
+  ['ab', 'cd', 'ef']
+  >>> regex.findall(r"\G\w{2}", "abcd ef")
+  ['ab', 'cd']
+
+* The search starts at position 0 and matches 2 letters 'ab'.
+
+* The search continues at position 2 and matches 2 letters 'cd'.
+
+* The search continues at position 4 and fails to match any letters.
+
+* The anchor stops the search start position from being advanced, so there are no more results.
+
+Reverse searching
+^^^^^^^^^^^^^^^^^
+
+Searches can now work backwards:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r".", "abc")
+  ['a', 'b', 'c']
+  >>> regex.findall(r"(?r).", "abc")
+  ['c', 'b', 'a']
+
+Note: the result of a reverse search is not necessarily the reverse of a forward search:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r"..", "abcde")
+  ['ab', 'cd']
+  >>> regex.findall(r"(?r)..", "abcde")
+  ['de', 'bc']
+
+Matching a single grapheme
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\X``
+
+The grapheme matcher is supported. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
+
+Branch reset
+^^^^^^^^^^^^
+
+``(?|...|...)``
+
+Capture group numbers will be reused across the alternatives, but groups with different names will have different group numbers.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(?|(first)|(second))", "first").groups()
+  ('first',)
+  >>> regex.match(r"(?|(first)|(second))", "second").groups()
+  ('second',)
+
+Note that there is only one group.
+
+Default Unicode word boundary
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``WORD`` flag changes the definition of a 'word boundary' to that of a default Unicode word boundary. This applies to ``\b`` and ``\B``.
+
+Timeout (Python 3)
+^^^^^^^^^^^^^^^^^^
+
+The matching methods and functions support timeouts. The timeout (in seconds) applies to the entire operation:
+
+.. sourcecode:: python
+
+  >>> from time import sleep
+  >>>
+  >>> def fast_replace(m):
+  ...     return 'X'
+  ...
+  >>> def slow_replace(m):
+  ...     sleep(0.5)
+  ...     return 'X'
+  ...
+  >>> regex.sub(r'[a-z]', fast_replace, 'abcde', timeout=2)
+  'XXXXX'
+  >>> regex.sub(r'[a-z]', slow_replace, 'abcde', timeout=2)
+  Traceback (most recent call last):
+    File "<stdin>", line 1, in <module>
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 276, in sub
+      endpos, concurrent, timeout)
+  TimeoutError: regex timed out
+
+
diff --git a/README.rst b/README.rst
index 304f2b116b05b7da1509b8c63f67fcf7cd21c63b..13f7709da88f8bb3cc4cbb1aeb45e70ba8d4d1de 100644
--- a/README.rst
+++ b/README.rst
@@ -143,8 +143,8 @@ Additional features
 
 The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
 
-Added support for lookaround in conditional pattern (`Hg issue 163 <https://bitbucket.org/mrabarnett/mrab-regex/issues/163>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added support for lookaround in conditional pattern (`Hg issue 163 <https://github.com/mrabarnett/mrab-regex/issues/163>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The test of a conditional pattern can now be a lookaround.
 
@@ -170,8 +170,8 @@ Examples:
 
 In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
 
-Added POSIX matching (leftmost longest) (`Hg issue 150 <https://bitbucket.org/mrabarnett/mrab-regex/issues/150>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added POSIX matching (leftmost longest) (`Hg issue 150 <https://github.com/mrabarnett/mrab-regex/issues/150>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
 
@@ -192,8 +192,8 @@ Examples:
 
 Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
 
-Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://bitbucket.org/mrabarnett/mrab-regex/issues/152>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://github.com/mrabarnett/mrab-regex/issues/152>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
 
@@ -204,8 +204,8 @@ Examples:
   >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
   <regex.Match object; span=(0, 11), match='5 elephants'>
 
-Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucket.org/mrabarnett/mrab-regex/issues/153>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://github.com/mrabarnett/mrab-regex/issues/153>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
 
@@ -213,8 +213,8 @@ Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucke
 
 ``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
 
-Added ``\K`` (`Hg issue 151 <https://bitbucket.org/mrabarnett/mrab-regex/issues/151>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``\K`` (`Hg issue 151 <https://github.com/mrabarnett/mrab-regex/issues/151>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
 
@@ -236,8 +236,8 @@ Examples:
   >>> m[1]
   'bcdef'
 
-Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://bitbucket.org/mrabarnett/mrab-regex/issues/133>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://github.com/mrabarnett/mrab-regex/issues/133>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 You can now use subscripting to get the captures of a repeated capture group.
 
@@ -271,8 +271,8 @@ Fixed the handling of locale-sensitive regexes.
 
 The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
 
-Added partial matches (`Hg issue 102 <https://bitbucket.org/mrabarnett/mrab-regex/issues/102>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added partial matches (`Hg issue 102 <https://github.com/mrabarnett/mrab-regex/issues/102>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
 
@@ -324,8 +324,8 @@ For example, if you wanted a user to enter a 4-digit number and check it charact
   >>> pattern.match('1233', partial=True).partial
   False
 
-``*`` operator not working correctly with sub() (`Hg issue 106 <https://bitbucket.org/mrabarnett/mrab-regex/issues/106>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``*`` operator not working correctly with sub() (`Hg issue 106 <https://github.com/mrabarnett/mrab-regex/issues/106>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
 
@@ -349,8 +349,8 @@ Examples:
   >>> regex.sub('(?V1).*?', '|', 'test')
   '|||||||||'
 
-Added ``capturesdict`` (`Hg issue 86 <https://bitbucket.org/mrabarnett/mrab-regex/issues/86>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``capturesdict`` (`Hg issue 86 <https://github.com/mrabarnett/mrab-regex/issues/86>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``capturesdict`` is a combination of ``groupdict`` and ``captures``:
 
@@ -374,8 +374,8 @@ Examples:
   >>> m.capturesdict()
   {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
 
-Allow duplicate names of groups (`Hg issue 87 <https://bitbucket.org/mrabarnett/mrab-regex/issues/87>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Allow duplicate names of groups (`Hg issue 87 <https://github.com/mrabarnett/mrab-regex/issues/87>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Group names can now be duplicated.
 
@@ -499,8 +499,8 @@ Example:
   >>> print(m.string)
   None
 
-Recursive patterns (`Hg issue 27 <https://bitbucket.org/mrabarnett/mrab-regex/issues/27>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Recursive patterns (`Hg issue 27 <https://github.com/mrabarnett/mrab-regex/issues/27>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Recursive and repeated patterns are supported.
 
@@ -545,8 +545,8 @@ Examples (in Python 3):
 
 In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
 
-Approximate "fuzzy" matching (`Hg issue 12 <https://bitbucket.org/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://bitbucket.org/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://bitbucket.org/mrabarnett/mrab-regex/issues/109>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Approximate "fuzzy" matching (`Hg issue 12 <https://github.com/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://github.com/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://github.com/mrabarnett/mrab-regex/issues/109>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
 
@@ -674,8 +674,8 @@ So the actual string was:
 
   'anaconda foo bar'
 
-Named lists (`Hg issue 11 <https://bitbucket.org/mrabarnett/mrab-regex/issues/11>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Named lists (`Hg issue 11 <https://github.com/mrabarnett/mrab-regex/issues/11>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``\L<name>``
 
@@ -787,8 +787,8 @@ Examples:
   >>> regex.escape("foo!?", special_only=True)
   'foo!\\?'
 
-regex.escape (`Hg issue 249 <https://bitbucket.org/mrabarnett/mrab-regex/issues/249>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+regex.escape (`Hg issue 249 <https://github.com/mrabarnett/mrab-regex/issues/249>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
 
diff --git a/docs/Features.html b/docs/Features.html
index 1704c4aef8ee42d760db6e4a11318332516b9c93..4c6342b8a71725783594407a44800bfb4b5bd789 100644
--- a/docs/Features.html
+++ b/docs/Features.html
@@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.17: http://docutils.sourceforge.net/" />
+<meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
 <title>Features.rst</title>
 <style type="text/css">
 
@@ -472,7 +472,7 @@ ul.auto-toc {
 <h1>Additional features</h1>
 <p>The issue numbers relate to the Python bug tracker, except where listed as &quot;Hg issue&quot;.</p>
 <div class="section" id="added-support-for-lookaround-in-conditional-pattern-hg-issue-163">
-<h2>Added support for lookaround in conditional pattern (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/163">Hg issue 163</a>)</h2>
+<h2>Added support for lookaround in conditional pattern (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/163">Hg issue 163</a>)</h2>
 <p>The test of a conditional pattern can now be a lookaround.</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -492,7 +492,7 @@ ul.auto-toc {
 <p>In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was <strong>not</strong> attempted.</p>
 </div>
 <div class="section" id="added-posix-matching-leftmost-longest-hg-issue-150">
-<h2>Added POSIX matching (leftmost longest) (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/150">Hg issue 150</a>)</h2>
+<h2>Added POSIX matching (leftmost longest) (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/150">Hg issue 150</a>)</h2>
 <p>The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the <tt class="docutils literal">POSIX</tt> flag (<tt class="docutils literal"><span class="pre">(?p)</span></tt>).</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -510,7 +510,7 @@ ul.auto-toc {
 <p>Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.</p>
 </div>
 <div class="section" id="added-define-hg-issue-152">
-<h2>Added <tt class="docutils literal"><span class="pre">(?(DEFINE)...)</span></tt> (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/152">Hg issue 152</a>)</h2>
+<h2>Added <tt class="docutils literal"><span class="pre">(?(DEFINE)...)</span></tt> (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/152">Hg issue 152</a>)</h2>
 <p>If there's no group called &quot;DEFINE&quot;, then ... will be ignored, but any group definitions within it will be available.</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -519,13 +519,13 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="added-prune-skip-and-fail-hg-issue-153">
-<h2>Added <tt class="docutils literal">(*PRUNE)</tt>, <tt class="docutils literal">(*SKIP)</tt> and <tt class="docutils literal">(*FAIL)</tt> (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/153">Hg issue 153</a>)</h2>
+<h2>Added <tt class="docutils literal">(*PRUNE)</tt>, <tt class="docutils literal">(*SKIP)</tt> and <tt class="docutils literal">(*FAIL)</tt> (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/153">Hg issue 153</a>)</h2>
 <p><tt class="docutils literal">(*PRUNE)</tt> discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.</p>
 <p><tt class="docutils literal">(*SKIP)</tt> is similar to <tt class="docutils literal">(*PRUNE)</tt>, except that it also sets where in the text the next attempt to match will start. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.</p>
 <p><tt class="docutils literal">(*FAIL)</tt> causes immediate backtracking. <tt class="docutils literal">(*F)</tt> is a permitted abbreviation.</p>
 </div>
 <div class="section" id="added-k-hg-issue-151">
-<h2>Added <tt class="docutils literal">\K</tt> (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/151">Hg issue 151</a>)</h2>
+<h2>Added <tt class="docutils literal">\K</tt> (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/151">Hg issue 151</a>)</h2>
 <p>Keeps the part of the entire match after the position where <tt class="docutils literal">\K</tt> occurred; the part before it is discarded.</p>
 <p>It does not affect what capture groups return.</p>
 <p>Examples:</p>
@@ -544,7 +544,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="added-capture-subscripting-for-expandf-and-subf-subfn-hg-issue-133">
-<h2>Added capture subscripting for <tt class="docutils literal">expandf</tt> and <tt class="docutils literal">subf</tt>/<tt class="docutils literal">subfn</tt> (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/133">Hg issue 133</a>)</h2>
+<h2>Added capture subscripting for <tt class="docutils literal">expandf</tt> and <tt class="docutils literal">subf</tt>/<tt class="docutils literal">subfn</tt> (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/133">Hg issue 133</a>)</h2>
 <p>You can now use subscripting to get the captures of a repeated capture group.</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -574,7 +574,7 @@ ul.auto-toc {
 <p>The <tt class="docutils literal">LOCALE</tt> flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.</p>
 </div>
 <div class="section" id="added-partial-matches-hg-issue-102">
-<h2>Added partial matches (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/102">Hg issue 102</a>)</h2>
+<h2>Added partial matches (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/102">Hg issue 102</a>)</h2>
 <p>A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.</p>
 <p>Partial matches are supported by <tt class="docutils literal">match</tt>, <tt class="docutils literal">search</tt>, <tt class="docutils literal">fullmatch</tt> and <tt class="docutils literal">finditer</tt> with the <tt class="docutils literal">partial</tt> keyword argument.</p>
 <p>Match objects have a <tt class="docutils literal">partial</tt> attribute, which is <tt class="docutils literal">True</tt> if it's a partial match.</p>
@@ -622,7 +622,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="operator-not-working-correctly-with-sub-hg-issue-106">
-<h2><tt class="docutils literal">*</tt> operator not working correctly with sub() (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/106">Hg issue 106</a>)</h2>
+<h2><tt class="docutils literal">*</tt> operator not working correctly with sub() (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/106">Hg issue 106</a>)</h2>
 <p>Sometimes it's not clear how zero-width matches should be handled. For example, should <tt class="docutils literal">.*</tt> match 0 characters directly after matching &gt;0 characters?</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -644,7 +644,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="added-capturesdict-hg-issue-86">
-<h2>Added <tt class="docutils literal">capturesdict</tt> (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/86">Hg issue 86</a>)</h2>
+<h2>Added <tt class="docutils literal">capturesdict</tt> (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/86">Hg issue 86</a>)</h2>
 <p><tt class="docutils literal">capturesdict</tt> is a combination of <tt class="docutils literal">groupdict</tt> and <tt class="docutils literal">captures</tt>:</p>
 <p><tt class="docutils literal">groupdict</tt> returns a dict of the named groups and the last capture of those groups.</p>
 <p><tt class="docutils literal">captures</tt> returns a list of all the captures of a group</p>
@@ -663,7 +663,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="allow-duplicate-names-of-groups-hg-issue-87">
-<h2>Allow duplicate names of groups (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/87">Hg issue 87</a>)</h2>
+<h2>Allow duplicate names of groups (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/87">Hg issue 87</a>)</h2>
 <p>Group names can now be duplicated.</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
@@ -773,7 +773,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="recursive-patterns-hg-issue-27">
-<h2>Recursive patterns (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/27">Hg issue 27</a>)</h2>
+<h2>Recursive patterns (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/27">Hg issue 27</a>)</h2>
 <p>Recursive and repeated patterns are supported.</p>
 <p><tt class="docutils literal"><span class="pre">(?R)</span></tt> or <tt class="docutils literal"><span class="pre">(?0)</span></tt> tries to match the entire regex recursively. <tt class="docutils literal"><span class="pre">(?1)</span></tt>, <tt class="docutils literal"><span class="pre">(?2)</span></tt>, etc, try to match the relevant capture group.</p>
 <p><tt class="docutils literal"><span class="pre">(?&amp;name)</span></tt> tries to match the named capture group.</p>
@@ -806,7 +806,7 @@ ul.auto-toc {
 <p>In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.</p>
 </div>
 <div class="section" id="approximate-fuzzy-matching-hg-issue-12-hg-issue-41-hg-issue-109">
-<h2>Approximate &quot;fuzzy&quot; matching (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/12">Hg issue 12</a>, <a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/41">Hg issue 41</a>, <a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/109">Hg issue 109</a>)</h2>
+<h2>Approximate &quot;fuzzy&quot; matching (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/12">Hg issue 12</a>, <a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/41">Hg issue 41</a>, <a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/109">Hg issue 109</a>)</h2>
 <p>Regex usually attempts an exact match, but sometimes an approximate, or &quot;fuzzy&quot;, match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.</p>
 <p>A fuzzy regex specifies which types of errors are permitted, and, optionally, either the minimum and maximum or only the maximum permitted number of each type. (You cannot specify only a minimum.)</p>
 <p>The 3 types of error are:</p>
@@ -897,7 +897,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="named-lists-hg-issue-11">
-<h2>Named lists (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/11">Hg issue 11</a>)</h2>
+<h2>Named lists (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/11">Hg issue 11</a>)</h2>
 <p><tt class="docutils literal">\L&lt;name&gt;</tt></p>
 <p>There are occasions where you may want to include a list (actually, a set) of options in a regex.</p>
 <p>One way is to build the pattern like this:</p>
@@ -979,7 +979,7 @@ ul.auto-toc {
 </pre>
 </div>
 <div class="section" id="regex-escape-hg-issue-249">
-<h2>regex.escape (<a class="reference external" href="https://bitbucket.org/mrabarnett/mrab-regex/issues/249">Hg issue 249</a>)</h2>
+<h2>regex.escape (<a class="reference external" href="https://github.com/mrabarnett/mrab-regex/issues/249">Hg issue 249</a>)</h2>
 <p>regex.escape has an additional keyword parameter <tt class="docutils literal">literal_spaces</tt>. When True, spaces are not escaped.</p>
 <p>Examples:</p>
 <pre class="code python literal-block">
diff --git a/docs/Features.rst b/docs/Features.rst
index 304f2b116b05b7da1509b8c63f67fcf7cd21c63b..13f7709da88f8bb3cc4cbb1aeb45e70ba8d4d1de 100644
--- a/docs/Features.rst
+++ b/docs/Features.rst
@@ -143,8 +143,8 @@ Additional features
 
 The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
 
-Added support for lookaround in conditional pattern (`Hg issue 163 <https://bitbucket.org/mrabarnett/mrab-regex/issues/163>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added support for lookaround in conditional pattern (`Hg issue 163 <https://github.com/mrabarnett/mrab-regex/issues/163>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The test of a conditional pattern can now be a lookaround.
 
@@ -170,8 +170,8 @@ Examples:
 
 In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
 
-Added POSIX matching (leftmost longest) (`Hg issue 150 <https://bitbucket.org/mrabarnett/mrab-regex/issues/150>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added POSIX matching (leftmost longest) (`Hg issue 150 <https://github.com/mrabarnett/mrab-regex/issues/150>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
 
@@ -192,8 +192,8 @@ Examples:
 
 Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
 
-Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://bitbucket.org/mrabarnett/mrab-regex/issues/152>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://github.com/mrabarnett/mrab-regex/issues/152>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
 
@@ -204,8 +204,8 @@ Examples:
   >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
   <regex.Match object; span=(0, 11), match='5 elephants'>
 
-Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucket.org/mrabarnett/mrab-regex/issues/153>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://github.com/mrabarnett/mrab-regex/issues/153>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
 
@@ -213,8 +213,8 @@ Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucke
 
 ``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
 
-Added ``\K`` (`Hg issue 151 <https://bitbucket.org/mrabarnett/mrab-regex/issues/151>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``\K`` (`Hg issue 151 <https://github.com/mrabarnett/mrab-regex/issues/151>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
 
@@ -236,8 +236,8 @@ Examples:
   >>> m[1]
   'bcdef'
 
-Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://bitbucket.org/mrabarnett/mrab-regex/issues/133>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://github.com/mrabarnett/mrab-regex/issues/133>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 You can now use subscripting to get the captures of a repeated capture group.
 
@@ -271,8 +271,8 @@ Fixed the handling of locale-sensitive regexes.
 
 The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
 
-Added partial matches (`Hg issue 102 <https://bitbucket.org/mrabarnett/mrab-regex/issues/102>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added partial matches (`Hg issue 102 <https://github.com/mrabarnett/mrab-regex/issues/102>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
 
@@ -324,8 +324,8 @@ For example, if you wanted a user to enter a 4-digit number and check it charact
   >>> pattern.match('1233', partial=True).partial
   False
 
-``*`` operator not working correctly with sub() (`Hg issue 106 <https://bitbucket.org/mrabarnett/mrab-regex/issues/106>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``*`` operator not working correctly with sub() (`Hg issue 106 <https://github.com/mrabarnett/mrab-regex/issues/106>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
 
@@ -349,8 +349,8 @@ Examples:
   >>> regex.sub('(?V1).*?', '|', 'test')
   '|||||||||'
 
-Added ``capturesdict`` (`Hg issue 86 <https://bitbucket.org/mrabarnett/mrab-regex/issues/86>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Added ``capturesdict`` (`Hg issue 86 <https://github.com/mrabarnett/mrab-regex/issues/86>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``capturesdict`` is a combination of ``groupdict`` and ``captures``:
 
@@ -374,8 +374,8 @@ Examples:
   >>> m.capturesdict()
   {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
 
-Allow duplicate names of groups (`Hg issue 87 <https://bitbucket.org/mrabarnett/mrab-regex/issues/87>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Allow duplicate names of groups (`Hg issue 87 <https://github.com/mrabarnett/mrab-regex/issues/87>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Group names can now be duplicated.
 
@@ -499,8 +499,8 @@ Example:
   >>> print(m.string)
   None
 
-Recursive patterns (`Hg issue 27 <https://bitbucket.org/mrabarnett/mrab-regex/issues/27>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Recursive patterns (`Hg issue 27 <https://github.com/mrabarnett/mrab-regex/issues/27>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Recursive and repeated patterns are supported.
 
@@ -545,8 +545,8 @@ Examples (in Python 3):
 
 In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
 
-Approximate "fuzzy" matching (`Hg issue 12 <https://bitbucket.org/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://bitbucket.org/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://bitbucket.org/mrabarnett/mrab-regex/issues/109>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Approximate "fuzzy" matching (`Hg issue 12 <https://github.com/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://github.com/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://github.com/mrabarnett/mrab-regex/issues/109>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
 
@@ -674,8 +674,8 @@ So the actual string was:
 
   'anaconda foo bar'
 
-Named lists (`Hg issue 11 <https://bitbucket.org/mrabarnett/mrab-regex/issues/11>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Named lists (`Hg issue 11 <https://github.com/mrabarnett/mrab-regex/issues/11>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``\L<name>``
 
@@ -787,8 +787,8 @@ Examples:
   >>> regex.escape("foo!?", special_only=True)
   'foo!\\?'
 
-regex.escape (`Hg issue 249 <https://bitbucket.org/mrabarnett/mrab-regex/issues/249>`_)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+regex.escape (`Hg issue 249 <https://github.com/mrabarnett/mrab-regex/issues/249>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
 
diff --git a/regex.egg-info/PKG-INFO b/regex.egg-info/PKG-INFO
index 9014429356322b9422a2bb7069bc7a5adcc22ebc..5e2fc12b5ff3dbbb98079ee5c453c7693a78e0d1 100644
--- a/regex.egg-info/PKG-INFO
+++ b/regex.egg-info/PKG-INFO
@@ -1,1091 +1,1095 @@
-Metadata-Version: 1.1
-Name: regex
-Version: 2021.10.8
-Summary: Alternative regular expression module, to replace re.
-Home-page: https://bitbucket.org/mrabarnett/mrab-regex
-Author: Matthew Barnett
-Author-email: regex@mrabarnett.plus.com
-License: Apache Software License
-Description: Introduction
-        ------------
-        
-        This regex implementation is backwards-compatible with the standard 're' module, but offers additional functionality.
-        
-        Note
-        ----
-        
-        The re module's behaviour with zero-width matches changed in Python 3.7, and this module will follow that behaviour when compiled for Python 3.7.
-        
-        PyPy
-        ----
-        
-        This module is targeted at CPython. It expects that all codepoints are the same width, so it won't behave properly with PyPy outside U+0000..U+007F because PyPy stores strings as UTF-8.
-        
-        Old vs new behaviour
-        --------------------
-        
-        In order to be compatible with the re module, this module has 2 behaviours:
-        
-        * **Version 0** behaviour (old behaviour, compatible with the re module):
-        
-          Please note that the re module's behaviour may change over time, and I'll endeavour to match that behaviour in version 0.
-        
-          * Indicated by the ``VERSION0`` or ``V0`` flag, or ``(?V0)`` in the pattern.
-        
-          * Zero-width matches are not handled correctly in the re module before Python 3.7. The behaviour in those earlier versions is:
-        
-            * ``.split`` won't split a string at a zero-width match.
-        
-            * ``.sub`` will advance by one character after a zero-width match.
-        
-          * Inline flags apply to the entire pattern, and they can't be turned off.
-        
-          * Only simple sets are supported.
-        
-          * Case-insensitive matches in Unicode use simple case-folding by default.
-        
-        * **Version 1** behaviour (new behaviour, possibly different from the re module):
-        
-          * Indicated by the ``VERSION1`` or ``V1`` flag, or ``(?V1)`` in the pattern.
-        
-          * Zero-width matches are handled correctly.
-        
-          * Inline flags apply to the end of the group or pattern, and they can be turned off.
-        
-          * Nested sets and set operations are supported.
-        
-          * Case-insensitive matches in Unicode use full case-folding by default.
-        
-        If no version is specified, the regex module will default to ``regex.DEFAULT_VERSION``.
-        
-        Case-insensitive matches in Unicode
-        -----------------------------------
-        
-        The regex module supports both simple and full case-folding for case-insensitive matches in Unicode. Use of full case-folding can be turned on using the ``FULLCASE`` or ``F`` flag, or ``(?f)`` in the pattern. Please note that this flag affects how the ``IGNORECASE`` flag works; the ``FULLCASE`` flag itself does not turn on case-insensitive matching.
-        
-        In the version 0 behaviour, the flag is off by default.
-        
-        In the version 1 behaviour, the flag is on by default.
-        
-        Nested sets and set operations
-        ------------------------------
-        
-        It's not possible to support both simple sets, as used in the re module, and nested sets at the same time because of a difference in the meaning of an unescaped ``"["`` in a set.
-        
-        For example, the pattern ``[[a-z]--[aeiou]]`` is treated in the version 0 behaviour (simple sets, compatible with the re module) as:
-        
-        * Set containing "[" and the letters "a" to "z"
-        
-        * Literal "--"
-        
-        * Set containing letters "a", "e", "i", "o", "u"
-        
-        * Literal "]"
-        
-        but in the version 1 behaviour (nested sets, enhanced behaviour) as:
-        
-        * Set which is:
-        
-          * Set containing the letters "a" to "z"
-        
-        * but excluding:
-        
-          * Set containing the letters "a", "e", "i", "o", "u"
-        
-        Version 0 behaviour: only simple sets are supported.
-        
-        Version 1 behaviour: nested sets and set operations are supported.
-        
-        Flags
-        -----
-        
-        There are 2 kinds of flag: scoped and global. Scoped flags can apply to only part of a pattern and can be turned on or off; global flags apply to the entire pattern and can only be turned on.
-        
-        The scoped flags are: ``FULLCASE``, ``IGNORECASE``, ``MULTILINE``, ``DOTALL``, ``VERBOSE``, ``WORD``.
-        
-        The global flags are: ``ASCII``, ``BESTMATCH``, ``ENHANCEMATCH``, ``LOCALE``, ``POSIX``, ``REVERSE``, ``UNICODE``, ``VERSION0``, ``VERSION1``.
-        
-        If neither the ``ASCII``, ``LOCALE`` nor ``UNICODE`` flag is specified, it will default to ``UNICODE`` if the regex pattern is a Unicode string and ``ASCII`` if it's a bytestring.
-        
-        The ``ENHANCEMATCH`` flag makes fuzzy matching attempt to improve the fit of the next match that it finds.
-        
-        The ``BESTMATCH`` flag makes fuzzy matching search for the best match instead of the next match.
-        
-        Notes on named capture groups
-        -----------------------------
-        
-        All capture groups have a group number, starting from 1.
-        
-        Groups with the same group name will have the same group number, and groups with a different group name will have a different group number.
-        
-        The same name can be used by more than one group, with later captures 'overwriting' earlier captures. All of the captures of the group will be available from the ``captures`` method of the match object.
-        
-        Group numbers will be reused across different branches of a branch reset, eg. ``(?|(first)|(second))`` has only group 1. If capture groups have different group names then they will, of course, have different group numbers, eg. ``(?|(?P<foo>first)|(?P<bar>second))`` has group 1 ("foo") and group 2 ("bar").
-        
-        In the regex ``(\s+)(?|(?P<foo>[A-Z]+)|(\w+) (?P<foo>[0-9]+)`` there are 2 groups:
-        
-        * ``(\s+)`` is group 1.
-        
-        * ``(?P<foo>[A-Z]+)`` is group 2, also called "foo".
-        
-        * ``(\w+)`` is group 2 because of the branch reset.
-        
-        * ``(?P<foo>[0-9]+)`` is group 2 because it's called "foo".
-        
-        If you want to prevent ``(\w+)`` from being group 2, you need to name it (different name, different group number).
-        
-        Multithreading
-        --------------
-        
-        The regex module releases the GIL during matching on instances of the built-in (immutable) string classes, enabling other Python threads to run concurrently. It is also possible to force the regex module to release the GIL during matching by calling the matching methods with the keyword argument ``concurrent=True``. The behaviour is undefined if the string changes during matching, so use it *only* when it is guaranteed that that won't happen.
-        
-        Unicode
-        -------
-        
-        This module supports Unicode 14.0.0.
-        
-        Full Unicode case-folding is supported.
-        
-        Additional features
-        -------------------
-        
-        The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
-        
-        Added support for lookaround in conditional pattern (`Hg issue 163 <https://bitbucket.org/mrabarnett/mrab-regex/issues/163>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The test of a conditional pattern can now be a lookaround.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
-          <regex.Match object; span=(0, 3), match='123'>
-          >>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
-          <regex.Match object; span=(0, 6), match='abc123'>
-        
-        This is not quite the same as putting a lookaround in the first branch of a pair of alternatives.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
-          <regex.Match object; span=(0, 6), match='123abc'>
-          >>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
-          None
-        
-        In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
-        
-        Added POSIX matching (leftmost longest) (`Hg issue 150 <https://bitbucket.org/mrabarnett/mrab-regex/issues/150>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> # Normal matching.
-          >>> regex.search(r'Mr|Mrs', 'Mrs')
-          <regex.Match object; span=(0, 2), match='Mr'>
-          >>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
-          <regex.Match object; span=(0, 7), match='oneself'>
-          >>> # POSIX matching.
-          >>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
-          <regex.Match object; span=(0, 3), match='Mrs'>
-          >>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
-          <regex.Match object; span=(0, 17), match='oneselfsufficient'>
-        
-        Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
-        
-        Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://bitbucket.org/mrabarnett/mrab-regex/issues/152>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
-          <regex.Match object; span=(0, 11), match='5 elephants'>
-        
-        Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://bitbucket.org/mrabarnett/mrab-regex/issues/153>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
-        
-        ``(*SKIP)`` is similar to ``(*PRUNE)``, except that it also sets where in the text the next attempt to match will start. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
-        
-        ``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
-        
-        Added ``\K`` (`Hg issue 151 <https://bitbucket.org/mrabarnett/mrab-regex/issues/151>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
-        
-        It does not affect what capture groups return.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
-          >>> m[0]
-          'cde'
-          >>> m[1]
-          'abcde'
-          >>>
-          >>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
-          >>> m[0]
-          'bc'
-          >>> m[1]
-          'bcdef'
-        
-        Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://bitbucket.org/mrabarnett/mrab-regex/issues/133>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        You can now use subscripting to get the captures of a repeated capture group.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(\w)+", "abc")
-          >>> m.expandf("{1}")
-          'c'
-          >>> m.expandf("{1[0]} {1[1]} {1[2]}")
-          'a b c'
-          >>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
-          'c b a'
-          >>>
-          >>> m = regex.match(r"(?P<letter>\w)+", "abc")
-          >>> m.expandf("{letter}")
-          'c'
-          >>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
-          'a b c'
-          >>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
-          'c b a'
-        
-        Added support for referring to a group by number using ``(?P=...)``.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        This is in addition to the existing ``\g<...>``.
-        
-        Fixed the handling of locale-sensitive regexes.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
-        
-        Added partial matches (`Hg issue 102 <https://bitbucket.org/mrabarnett/mrab-regex/issues/102>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
-        
-        Partial matches are supported by ``match``, ``search``, ``fullmatch`` and ``finditer`` with the ``partial`` keyword argument.
-        
-        Match objects have a ``partial`` attribute, which is ``True`` if it's a partial match.
-        
-        For example, if you wanted a user to enter a 4-digit number and check it character by character as it was being entered:
-        
-        .. sourcecode:: python
-        
-          >>> pattern = regex.compile(r'\d{4}')
-        
-          >>> # Initially, nothing has been entered:
-          >>> print(pattern.fullmatch('', partial=True))
-          <regex.Match object; span=(0, 0), match='', partial=True>
-        
-          >>> # An empty string is OK, but it's only a partial match.
-          >>> # The user enters a letter:
-          >>> print(pattern.fullmatch('a', partial=True))
-          None
-          >>> # It'll never match.
-        
-          >>> # The user deletes that and enters a digit:
-          >>> print(pattern.fullmatch('1', partial=True))
-          <regex.Match object; span=(0, 1), match='1', partial=True>
-          >>> # It matches this far, but it's only a partial match.
-        
-          >>> # The user enters 2 more digits:
-          >>> print(pattern.fullmatch('123', partial=True))
-          <regex.Match object; span=(0, 3), match='123', partial=True>
-          >>> # It matches this far, but it's only a partial match.
-        
-          >>> # The user enters another digit:
-          >>> print(pattern.fullmatch('1234', partial=True))
-          <regex.Match object; span=(0, 4), match='1234'>
-          >>> # It's a complete match.
-        
-          >>> # If the user enters another digit:
-          >>> print(pattern.fullmatch('12345', partial=True))
-          None
-          >>> # It's no longer a match.
-        
-          >>> # This is a partial match:
-          >>> pattern.match('123', partial=True).partial
-          True
-        
-          >>> # This is a complete match:
-          >>> pattern.match('1233', partial=True).partial
-          False
-        
-        ``*`` operator not working correctly with sub() (`Hg issue 106 <https://bitbucket.org/mrabarnett/mrab-regex/issues/106>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          # Python 3.7 and later
-          >>> regex.sub('.*', 'x', 'test')
-          'xx'
-          >>> regex.sub('.*?', '|', 'test')
-          '|||||||||'
-        
-          # Python 3.6 and earlier
-          >>> regex.sub('(?V0).*', 'x', 'test')
-          'x'
-          >>> regex.sub('(?V1).*', 'x', 'test')
-          'xx'
-          >>> regex.sub('(?V0).*?', '|', 'test')
-          '|t|e|s|t|'
-          >>> regex.sub('(?V1).*?', '|', 'test')
-          '|||||||||'
-        
-        Added ``capturesdict`` (`Hg issue 86 <https://bitbucket.org/mrabarnett/mrab-regex/issues/86>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``capturesdict`` is a combination of ``groupdict`` and ``captures``:
-        
-        ``groupdict`` returns a dict of the named groups and the last capture of those groups.
-        
-        ``captures`` returns a list of all the captures of a group
-        
-        ``capturesdict`` returns a dict of the named groups and lists of all the captures of those groups.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
-          >>> m.groupdict()
-          {'word': 'three', 'digits': '3'}
-          >>> m.captures("word")
-          ['one', 'two', 'three']
-          >>> m.captures("digits")
-          ['1', '2', '3']
-          >>> m.capturesdict()
-          {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
-        
-        Allow duplicate names of groups (`Hg issue 87 <https://bitbucket.org/mrabarnett/mrab-regex/issues/87>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Group names can now be duplicated.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> # With optional groups:
-          >>>
-          >>> # Both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['first', 'second']
-          >>> # Only the second group captures.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['second']
-          >>> # Only the first group captures.
-          >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")
-          >>> m.group("item")
-          'first'
-          >>> m.captures("item")
-          ['first']
-          >>>
-          >>> # With mandatory groups:
-          >>>
-          >>> # Both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['first', 'second']
-          >>> # Again, both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")
-          >>> m.group("item")
-          'second'
-          >>> m.captures("item")
-          ['', 'second']
-          >>> # And yet again, both groups capture, the second capture 'overwriting' the first.
-          >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")
-          >>> m.group("item")
-          ''
-          >>> m.captures("item")
-          ['first', '']
-        
-        Added ``fullmatch`` (`issue #16203 <https://bugs.python.org/issue16203>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``fullmatch`` behaves like ``match``, except that it must match all of the string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> print(regex.fullmatch(r"abc", "abc").span())
-          (0, 3)
-          >>> print(regex.fullmatch(r"abc", "abcx"))
-          None
-          >>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
-          (0, 3)
-          >>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
-          (1, 4)
-          >>>
-          >>> regex.match(r"a.*?", "abcd").group(0)
-          'a'
-          >>> regex.fullmatch(r"a.*?", "abcd").group(0)
-          'abcd'
-        
-        Added ``subf`` and ``subfn``
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``subf`` and ``subfn`` are alternatives to ``sub`` and ``subn`` respectively. When passed a replacement string, they treat it as a format string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
-          'foo bar => bar foo'
-          >>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
-          'bar foo'
-        
-        Added ``expandf`` to match object
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``expandf`` is an alternative to ``expand``. When passed a replacement string, it treats it as a format string.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.match(r"(\w+) (\w+)", "foo bar")
-          >>> m.expandf("{0} => {2} {1}")
-          'foo bar => bar foo'
-          >>>
-          >>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
-          >>> m.expandf("{word2} {word1}")
-          'bar foo'
-        
-        Detach searched string
-        ^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object contains a reference to the string that was searched, via its ``string`` attribute. The ``detach_string`` method will 'detach' that string, making it available for garbage collection, which might save valuable memory if that string is very large.
-        
-        Example:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"\w+", "Hello world")
-          >>> print(m.group())
-          Hello
-          >>> print(m.string)
-          Hello world
-          >>> m.detach_string()
-          >>> print(m.group())
-          Hello
-          >>> print(m.string)
-          None
-        
-        Recursive patterns (`Hg issue 27 <https://bitbucket.org/mrabarnett/mrab-regex/issues/27>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Recursive and repeated patterns are supported.
-        
-        ``(?R)`` or ``(?0)`` tries to match the entire regex recursively. ``(?1)``, ``(?2)``, etc, try to match the relevant capture group.
-        
-        ``(?&name)`` tries to match the named capture group.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
-          ('Tarzan',)
-          >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
-          ('Jane',)
-        
-          >>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")
-          >>> m.group(0, 1, 2)
-          ('kayak', 'k', None)
-        
-        The first two examples show how the subpattern within the capture group is reused, but is _not_ itself a capture group. In other words, ``"(Tarzan|Jane) loves (?1)"`` is equivalent to ``"(Tarzan|Jane) loves (?:Tarzan|Jane)"``.
-        
-        It's possible to backtrack into a recursed or repeated group.
-        
-        You can't call a group if there is more than one group with that group name or group number (``"ambiguous group reference"``).
-        
-        The alternative forms ``(?P>name)`` and ``(?P&name)`` are also supported.
-        
-        Full Unicode case-folding is supported.
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        In version 1 behaviour, the regex module uses full case-folding when performing case-insensitive matches in Unicode.
-        
-        Examples (in Python 3):
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
-          (0, 6)
-          >>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
-          (0, 7)
-        
-        In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
-        
-        Approximate "fuzzy" matching (`Hg issue 12 <https://bitbucket.org/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://bitbucket.org/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://bitbucket.org/mrabarnett/mrab-regex/issues/109>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
-        
-        A fuzzy regex specifies which types of errors are permitted, and, optionally, either the minimum and maximum or only the maximum permitted number of each type. (You cannot specify only a minimum.)
-        
-        The 3 types of error are:
-        
-        * Insertion, indicated by "i"
-        
-        * Deletion, indicated by "d"
-        
-        * Substitution, indicated by "s"
-        
-        In addition, "e" indicates any type of error.
-        
-        The fuzziness of a regex item is specified between "{" and "}" after the item.
-        
-        Examples:
-        
-        * ``foo`` match "foo" exactly
-        
-        * ``(?:foo){i}`` match "foo", permitting insertions
-        
-        * ``(?:foo){d}`` match "foo", permitting deletions
-        
-        * ``(?:foo){s}`` match "foo", permitting substitutions
-        
-        * ``(?:foo){i,s}`` match "foo", permitting insertions and substitutions
-        
-        * ``(?:foo){e}`` match "foo", permitting errors
-        
-        If a certain type of error is specified, then any type not specified will **not** be permitted.
-        
-        In the following examples I'll omit the item and write only the fuzziness:
-        
-        * ``{d<=3}`` permit at most 3 deletions, but no other types
-        
-        * ``{i<=1,s<=2}`` permit at most 1 insertion and at most 2 substitutions, but no deletions
-        
-        * ``{1<=e<=3}`` permit at least 1 and at most 3 errors
-        
-        * ``{i<=2,d<=2,e<=3}`` permit at most 2 insertions, at most 2 deletions, at most 3 errors in total, but no substitutions
-        
-        It's also possible to state the costs of each type of error and the maximum permitted total cost.
-        
-        Examples:
-        
-        * ``{2i+2d+1s<=4}`` each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
-        
-        * ``{i<=1,d<=1,s<=1,2i+2d+1s<=4}`` at most 1 insertion, at most 1 deletion, at most 1 substitution; each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
-        
-        You can also use "<" instead of "<=" if you want an exclusive minimum or maximum.
-        
-        You can add a test to perform on a character that's substituted or inserted.
-        
-        Examples:
-        
-        * ``{s<=2:[a-z]}`` at most 2 substitutions, which must be in the character set ``[a-z]``.
-        
-        * ``{s<=2,i<=3:\d}`` at most 2 substitutions, at most 3 insertions, which must be digits.
-        
-        By default, fuzzy matching searches for the first match that meets the given constraints. The ``ENHANCEMATCH`` flag will cause it to attempt to improve the fit (i.e. reduce the number of errors) of the match that it has found.
-        
-        The ``BESTMATCH`` flag will make it search for the best match instead.
-        
-        Further examples to note:
-        
-        * ``regex.search("(dog){e}", "cat and dog")[1]`` returns ``"cat"`` because that matches ``"dog"`` with 3 errors (an unlimited number of errors is permitted).
-        
-        * ``regex.search("(dog){e<=1}", "cat and dog")[1]`` returns ``" dog"`` (with a leading space) because that matches ``"dog"`` with 1 error, which is within the limit.
-        
-        * ``regex.search("(?e)(dog){e<=1}", "cat and dog")[1]`` returns ``"dog"`` (without a leading space) because the fuzzy search matches ``" dog"`` with 1 error, which is within the limit, and the ``(?e)`` then it attempts a better fit.
-        
-        In the first two examples there are perfect matches later in the string, but in neither case is it the first possible match.
-        
-        The match object has an attribute ``fuzzy_counts`` which gives the total number of substitutions, insertions and deletions.
-        
-        .. sourcecode:: python
-        
-          >>> # A 'raw' fuzzy match:
-          >>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
-          (0, 0, 1)
-          >>> # 0 substitutions, 0 insertions, 1 deletion.
-        
-          >>> # A better match might be possible if the ENHANCEMATCH flag used:
-          >>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
-          (0, 0, 0)
-          >>> # 0 substitutions, 0 insertions, 0 deletions.
-        
-        The match object also has an attribute ``fuzzy_changes`` which gives a tuple of the positions of the substitutions, insertions and deletions.
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')
-          >>> m
-          <regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>
-          >>> m.fuzzy_changes
-          ([], [7, 8], [10, 11])
-        
-        What this means is that if the matched part of the string had been:
-        
-        .. sourcecode:: python
-        
-          'anacondfuuoo bar'
-        
-        it would've been an exact match.
-        
-        However, there were insertions at positions 7 and 8:
-        
-        .. sourcecode:: python
-        
-          'anaconda fuuoo bar'
-                  ^^
-        
-        and deletions at positions 10 and 11:
-        
-        .. sourcecode:: python
-        
-          'anaconda f~~oo bar'
-                     ^^
-        
-        So the actual string was:
-        
-        .. sourcecode:: python
-        
-          'anaconda foo bar'
-        
-        Named lists (`Hg issue 11 <https://bitbucket.org/mrabarnett/mrab-regex/issues/11>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\L<name>``
-        
-        There are occasions where you may want to include a list (actually, a set) of options in a regex.
-        
-        One way is to build the pattern like this:
-        
-        .. sourcecode:: python
-        
-          >>> p = regex.compile(r"first|second|third|fourth|fifth")
-        
-        but if the list is large, parsing the resulting regex can take considerable time, and care must also be taken that the strings are properly escaped and properly ordered, for example, "cats" before "cat".
-        
-        The new alternative is to use a named list:
-        
-        .. sourcecode:: python
-        
-          >>> option_set = ["first", "second", "third", "fourth", "fifth"]
-          >>> p = regex.compile(r"\L<options>", options=option_set)
-        
-        The order of the items is irrelevant, they are treated as a set. The named lists are available as the ``.named_lists`` attribute of the pattern object :
-        
-        .. sourcecode:: python
-        
-          >>> print(p.named_lists)
-          # Python 3
-          {'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}
-          # Python 2
-          {'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}
-        
-        If there are any unused keyword arguments, ``ValueError`` will be raised unless you tell it otherwise:
-        
-        .. sourcecode:: python
-        
-          >>> option_set = ["first", "second", "third", "fourth", "fifth"]
-          >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
-          Traceback (most recent call last):
-            File "<stdin>", line 1, in <module>
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
-              return _compile(pattern, flags, ignore_unused, kwargs)
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
-              raise ValueError('unused keyword argument {!a}'.format(any_one))
-          ValueError: unused keyword argument 'other_options'
-          >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)
-          >>>
-        
-        Start and end of word
-        ^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\m`` matches at the start of a word.
-        
-        ``\M`` matches at the end of a word.
-        
-        Compare with ``\b``, which matches at the start or end of a word.
-        
-        Unicode line separators
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        Normally the only line separator is ``\n`` (``\x0A``), but if the ``WORD`` flag is turned on then the line separators are ``\x0D\x0A``, ``\x0A``, ``\x0B``, ``\x0C`` and ``\x0D``, plus ``\x85``, ``\u2028`` and ``\u2029`` when working with Unicode.
-        
-        This affects the regex dot ``"."``, which, with the ``DOTALL`` flag turned off, matches any character except a line separator. It also affects the line anchors ``^`` and ``$`` (in multiline mode).
-        
-        Set operators
-        ^^^^^^^^^^^^^
-        
-        **Version 1 behaviour only**
-        
-        Set operators have been added, and a set ``[...]`` can include nested sets.
-        
-        The operators, in order of increasing precedence, are:
-        
-        * ``||`` for union ("x||y" means "x or y")
-        
-        * ``~~`` (double tilde) for symmetric difference ("x~~y" means "x or y, but not both")
-        
-        * ``&&`` for intersection ("x&&y" means "x and y")
-        
-        * ``--`` (double dash) for difference ("x--y" means "x but not y")
-        
-        Implicit union, ie, simple juxtaposition like in ``[ab]``, has the highest precedence. Thus, ``[ab&&cd]`` is the same as ``[[a||b]&&[c||d]]``.
-        
-        Examples:
-        
-        * ``[ab]`` # Set containing 'a' and 'b'
-        
-        * ``[a-z]`` # Set containing 'a' .. 'z'
-        
-        * ``[[a-z]--[qw]]`` # Set containing 'a' .. 'z', but not 'q' or 'w'
-        
-        * ``[a-z--qw]`` # Same as above
-        
-        * ``[\p{L}--QW]`` # Set containing all letters except 'Q' and 'W'
-        
-        * ``[\p{N}--[0-9]]`` # Set containing all numbers except '0' .. '9'
-        
-        * ``[\p{ASCII}&&\p{Letter}]`` # Set containing all characters which are ASCII and letter
-        
-        regex.escape (`issue #2650 <https://bugs.python.org/issue2650>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        regex.escape has an additional keyword parameter ``special_only``. When True, only 'special' regex characters, such as '?', are escaped.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.escape("foo!?", special_only=False)
-          'foo\\!\\?'
-          >>> regex.escape("foo!?", special_only=True)
-          'foo!\\?'
-        
-        regex.escape (`Hg issue 249 <https://bitbucket.org/mrabarnett/mrab-regex/issues/249>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.escape("foo bar!?", literal_spaces=False)
-          'foo\\ bar!\\?'
-          >>> regex.escape("foo bar!?", literal_spaces=True)
-          'foo bar!\\?'
-        
-        Repeated captures (`issue #7132 <https://bugs.python.org/issue7132>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object has additional methods which return information on all the successful matches of a repeated capture group. These methods are:
-        
-        * ``matchobject.captures([group1, ...])``
-        
-          * Returns a list of the strings matched in a group or groups. Compare with ``matchobject.group([group1, ...])``.
-        
-        * ``matchobject.starts([group])``
-        
-          * Returns a list of the start positions. Compare with ``matchobject.start([group])``.
-        
-        * ``matchobject.ends([group])``
-        
-          * Returns a list of the end positions. Compare with ``matchobject.end([group])``.
-        
-        * ``matchobject.spans([group])``
-        
-          * Returns a list of the spans. Compare with ``matchobject.span([group])``.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"(\w{3})+", "123456789")
-          >>> m.group(1)
-          '789'
-          >>> m.captures(1)
-          ['123', '456', '789']
-          >>> m.start(1)
-          6
-          >>> m.starts(1)
-          [0, 3, 6]
-          >>> m.end(1)
-          9
-          >>> m.ends(1)
-          [3, 6, 9]
-          >>> m.span(1)
-          (6, 9)
-          >>> m.spans(1)
-          [(0, 3), (3, 6), (6, 9)]
-        
-        Atomic grouping (`issue #433030 <https://bugs.python.org/issue433030>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?>...)``
-        
-        If the following pattern subsequently fails, then the subpattern as a whole will fail.
-        
-        Possessive quantifiers.
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?:...)?+`` ; ``(?:...)*+`` ; ``(?:...)++`` ; ``(?:...){min,max}+``
-        
-        The subpattern is matched up to 'max' times. If the following pattern subsequently fails, then all of the repeated subpatterns will fail as a whole. For example, ``(?:...)++`` is equivalent to ``(?>(?:...)+)``.
-        
-        Scoped flags (`issue #433028 <https://bugs.python.org/issue433028>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``(?flags-flags:...)``
-        
-        The flags will apply only to the subpattern. Flags can be turned on or off.
-        
-        Definition of 'word' character (`issue #1693050 <https://bugs.python.org/issue1693050>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The definition of a 'word' character has been expanded for Unicode. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
-        
-        Variable-length lookbehind
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A lookbehind can match a variable-length string.
-        
-        Flags argument for regex.split, regex.sub and regex.subn (`issue #3482 <https://bugs.python.org/issue3482>`_)
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.split``, ``regex.sub`` and ``regex.subn`` support a 'flags' argument.
-        
-        Pos and endpos arguments for regex.sub and regex.subn
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.sub`` and ``regex.subn`` support 'pos' and 'endpos' arguments.
-        
-        'Overlapped' argument for regex.findall and regex.finditer
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``regex.findall`` and ``regex.finditer`` support an 'overlapped' flag which permits overlapped matches.
-        
-        Splititer
-        ^^^^^^^^^
-        
-        ``regex.splititer`` has been added. It's a generator equivalent of ``regex.split``.
-        
-        Subscripting for groups
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        A match object accepts access to the captured groups via subscripting and slicing:
-        
-        .. sourcecode:: python
-        
-          >>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")
-          >>> print(m["before"])
-          pqr
-          >>> print(len(m))
-          4
-          >>> print(m[:])
-          ('pqr123stu', 'pqr', '123', 'stu')
-        
-        Named groups
-        ^^^^^^^^^^^^
-        
-        Groups can be named with ``(?<name>...)`` as well as the current ``(?P<name>...)``.
-        
-        Group references
-        ^^^^^^^^^^^^^^^^
-        
-        Groups can be referenced within a pattern with ``\g<name>``. This also allows there to be more than 99 groups.
-        
-        Named characters
-        ^^^^^^^^^^^^^^^^
-        
-        ``\N{name}``
-        
-        Named characters are supported. (Note: only those known by Python's Unicode database are supported.)
-        
-        Unicode codepoint properties, including scripts and blocks
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\p{property=value}``; ``\P{property=value}``; ``\p{value}`` ; ``\P{value}``
-        
-        Many Unicode properties are supported, including blocks and scripts. ``\p{property=value}`` or ``\p{property:value}`` matches a character whose property ``property`` has value ``value``. The inverse of ``\p{property=value}`` is ``\P{property=value}`` or ``\p{^property=value}``.
-        
-        If the short form ``\p{value}`` is used, the properties are checked in the order: ``General_Category``, ``Script``, ``Block``, binary property:
-        
-        * ``Latin``, the 'Latin' script (``Script=Latin``).
-        
-        * ``BasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
-        
-        * ``Alphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
-        
-        A short form starting with ``Is`` indicates a script or binary property:
-        
-        * ``IsLatin``, the 'Latin' script (``Script=Latin``).
-        
-        * ``IsAlphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
-        
-        A short form starting with ``In`` indicates a block property:
-        
-        * ``InBasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
-        
-        POSIX character classes
-        ^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``[[:alpha:]]``; ``[[:^alpha:]]``
-        
-        POSIX character classes are supported. These are normally treated as an alternative form of ``\p{...}``.
-        
-        The exceptions are ``alnum``, ``digit``, ``punct`` and ``xdigit``, whose definitions are different from those of Unicode.
-        
-        ``[[:alnum:]]`` is equivalent to ``\p{posix_alnum}``.
-        
-        ``[[:digit:]]`` is equivalent to ``\p{posix_digit}``.
-        
-        ``[[:punct:]]`` is equivalent to ``\p{posix_punct}``.
-        
-        ``[[:xdigit:]]`` is equivalent to ``\p{posix_xdigit}``.
-        
-        Search anchor
-        ^^^^^^^^^^^^^
-        
-        ``\G``
-        
-        A search anchor has been added. It matches at the position where each search started/continued and can be used for contiguous matches or in negative variable-length lookbehinds to limit how far back the lookbehind goes:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r"\w{2}", "abcd ef")
-          ['ab', 'cd', 'ef']
-          >>> regex.findall(r"\G\w{2}", "abcd ef")
-          ['ab', 'cd']
-        
-        * The search starts at position 0 and matches 2 letters 'ab'.
-        
-        * The search continues at position 2 and matches 2 letters 'cd'.
-        
-        * The search continues at position 4 and fails to match any letters.
-        
-        * The anchor stops the search start position from being advanced, so there are no more results.
-        
-        Reverse searching
-        ^^^^^^^^^^^^^^^^^
-        
-        Searches can now work backwards:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r".", "abc")
-          ['a', 'b', 'c']
-          >>> regex.findall(r"(?r).", "abc")
-          ['c', 'b', 'a']
-        
-        Note: the result of a reverse search is not necessarily the reverse of a forward search:
-        
-        .. sourcecode:: python
-        
-          >>> regex.findall(r"..", "abcde")
-          ['ab', 'cd']
-          >>> regex.findall(r"(?r)..", "abcde")
-          ['de', 'bc']
-        
-        Matching a single grapheme
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        ``\X``
-        
-        The grapheme matcher is supported. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
-        
-        Branch reset
-        ^^^^^^^^^^^^
-        
-        ``(?|...|...)``
-        
-        Capture group numbers will be reused across the alternatives, but groups with different names will have different group numbers.
-        
-        Examples:
-        
-        .. sourcecode:: python
-        
-          >>> regex.match(r"(?|(first)|(second))", "first").groups()
-          ('first',)
-          >>> regex.match(r"(?|(first)|(second))", "second").groups()
-          ('second',)
-        
-        Note that there is only one group.
-        
-        Default Unicode word boundary
-        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-        
-        The ``WORD`` flag changes the definition of a 'word boundary' to that of a default Unicode word boundary. This applies to ``\b`` and ``\B``.
-        
-        Timeout (Python 3)
-        ^^^^^^^^^^^^^^^^^^
-        
-        The matching methods and functions support timeouts. The timeout (in seconds) applies to the entire operation:
-        
-        .. sourcecode:: python
-        
-          >>> from time import sleep
-          >>>
-          >>> def fast_replace(m):
-          ...     return 'X'
-          ...
-          >>> def slow_replace(m):
-          ...     sleep(0.5)
-          ...     return 'X'
-          ...
-          >>> regex.sub(r'[a-z]', fast_replace, 'abcde', timeout=2)
-          'XXXXX'
-          >>> regex.sub(r'[a-z]', slow_replace, 'abcde', timeout=2)
-          Traceback (most recent call last):
-            File "<stdin>", line 1, in <module>
-            File "C:\Python37\lib\site-packages\regex\regex.py", line 276, in sub
-              endpos, concurrent, timeout)
-          TimeoutError: regex timed out
-        
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Topic :: Scientific/Engineering :: Information Analysis
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Topic :: Text Processing
-Classifier: Topic :: Text Processing :: General
+Metadata-Version: 2.1
+Name: regex
+Version: 2021.11.10
+Summary: Alternative regular expression module, to replace re.
+Home-page: https://github.com/mrabarnett/mrab-regex
+Author: Matthew Barnett
+Author-email: regex@mrabarnett.plus.com
+License: Apache Software License
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing
+Classifier: Topic :: Text Processing :: General
+Description-Content-Type: text/x-rst
+License-File: LICENSE.txt
+
+Introduction
+------------
+
+This regex implementation is backwards-compatible with the standard 're' module, but offers additional functionality.
+
+Note
+----
+
+The re module's behaviour with zero-width matches changed in Python 3.7, and this module will follow that behaviour when compiled for Python 3.7.
+
+PyPy
+----
+
+This module is targeted at CPython. It expects that all codepoints are the same width, so it won't behave properly with PyPy outside U+0000..U+007F because PyPy stores strings as UTF-8.
+
+Old vs new behaviour
+--------------------
+
+In order to be compatible with the re module, this module has 2 behaviours:
+
+* **Version 0** behaviour (old behaviour, compatible with the re module):
+
+  Please note that the re module's behaviour may change over time, and I'll endeavour to match that behaviour in version 0.
+
+  * Indicated by the ``VERSION0`` or ``V0`` flag, or ``(?V0)`` in the pattern.
+
+  * Zero-width matches are not handled correctly in the re module before Python 3.7. The behaviour in those earlier versions is:
+
+    * ``.split`` won't split a string at a zero-width match.
+
+    * ``.sub`` will advance by one character after a zero-width match.
+
+  * Inline flags apply to the entire pattern, and they can't be turned off.
+
+  * Only simple sets are supported.
+
+  * Case-insensitive matches in Unicode use simple case-folding by default.
+
+* **Version 1** behaviour (new behaviour, possibly different from the re module):
+
+  * Indicated by the ``VERSION1`` or ``V1`` flag, or ``(?V1)`` in the pattern.
+
+  * Zero-width matches are handled correctly.
+
+  * Inline flags apply to the end of the group or pattern, and they can be turned off.
+
+  * Nested sets and set operations are supported.
+
+  * Case-insensitive matches in Unicode use full case-folding by default.
+
+If no version is specified, the regex module will default to ``regex.DEFAULT_VERSION``.
+
+Case-insensitive matches in Unicode
+-----------------------------------
+
+The regex module supports both simple and full case-folding for case-insensitive matches in Unicode. Use of full case-folding can be turned on using the ``FULLCASE`` or ``F`` flag, or ``(?f)`` in the pattern. Please note that this flag affects how the ``IGNORECASE`` flag works; the ``FULLCASE`` flag itself does not turn on case-insensitive matching.
+
+In the version 0 behaviour, the flag is off by default.
+
+In the version 1 behaviour, the flag is on by default.
+
+Nested sets and set operations
+------------------------------
+
+It's not possible to support both simple sets, as used in the re module, and nested sets at the same time because of a difference in the meaning of an unescaped ``"["`` in a set.
+
+For example, the pattern ``[[a-z]--[aeiou]]`` is treated in the version 0 behaviour (simple sets, compatible with the re module) as:
+
+* Set containing "[" and the letters "a" to "z"
+
+* Literal "--"
+
+* Set containing letters "a", "e", "i", "o", "u"
+
+* Literal "]"
+
+but in the version 1 behaviour (nested sets, enhanced behaviour) as:
+
+* Set which is:
+
+  * Set containing the letters "a" to "z"
+
+* but excluding:
+
+  * Set containing the letters "a", "e", "i", "o", "u"
+
+Version 0 behaviour: only simple sets are supported.
+
+Version 1 behaviour: nested sets and set operations are supported.
+
+Flags
+-----
+
+There are 2 kinds of flag: scoped and global. Scoped flags can apply to only part of a pattern and can be turned on or off; global flags apply to the entire pattern and can only be turned on.
+
+The scoped flags are: ``FULLCASE``, ``IGNORECASE``, ``MULTILINE``, ``DOTALL``, ``VERBOSE``, ``WORD``.
+
+The global flags are: ``ASCII``, ``BESTMATCH``, ``ENHANCEMATCH``, ``LOCALE``, ``POSIX``, ``REVERSE``, ``UNICODE``, ``VERSION0``, ``VERSION1``.
+
+If neither the ``ASCII``, ``LOCALE`` nor ``UNICODE`` flag is specified, it will default to ``UNICODE`` if the regex pattern is a Unicode string and ``ASCII`` if it's a bytestring.
+
+The ``ENHANCEMATCH`` flag makes fuzzy matching attempt to improve the fit of the next match that it finds.
+
+The ``BESTMATCH`` flag makes fuzzy matching search for the best match instead of the next match.
+
+Notes on named capture groups
+-----------------------------
+
+All capture groups have a group number, starting from 1.
+
+Groups with the same group name will have the same group number, and groups with a different group name will have a different group number.
+
+The same name can be used by more than one group, with later captures 'overwriting' earlier captures. All of the captures of the group will be available from the ``captures`` method of the match object.
+
+Group numbers will be reused across different branches of a branch reset, eg. ``(?|(first)|(second))`` has only group 1. If capture groups have different group names then they will, of course, have different group numbers, eg. ``(?|(?P<foo>first)|(?P<bar>second))`` has group 1 ("foo") and group 2 ("bar").
+
+In the regex ``(\s+)(?|(?P<foo>[A-Z]+)|(\w+) (?P<foo>[0-9]+)`` there are 2 groups:
+
+* ``(\s+)`` is group 1.
+
+* ``(?P<foo>[A-Z]+)`` is group 2, also called "foo".
+
+* ``(\w+)`` is group 2 because of the branch reset.
+
+* ``(?P<foo>[0-9]+)`` is group 2 because it's called "foo".
+
+If you want to prevent ``(\w+)`` from being group 2, you need to name it (different name, different group number).
+
+Multithreading
+--------------
+
+The regex module releases the GIL during matching on instances of the built-in (immutable) string classes, enabling other Python threads to run concurrently. It is also possible to force the regex module to release the GIL during matching by calling the matching methods with the keyword argument ``concurrent=True``. The behaviour is undefined if the string changes during matching, so use it *only* when it is guaranteed that that won't happen.
+
+Unicode
+-------
+
+This module supports Unicode 14.0.0.
+
+Full Unicode case-folding is supported.
+
+Additional features
+-------------------
+
+The issue numbers relate to the Python bug tracker, except where listed as "Hg issue".
+
+Added support for lookaround in conditional pattern (`Hg issue 163 <https://github.com/mrabarnett/mrab-regex/issues/163>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The test of a conditional pattern can now be a lookaround.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
+  <regex.Match object; span=(0, 3), match='123'>
+  >>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
+  <regex.Match object; span=(0, 6), match='abc123'>
+
+This is not quite the same as putting a lookaround in the first branch of a pair of alternatives.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
+  <regex.Match object; span=(0, 6), match='123abc'>
+  >>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
+  None
+
+In the first example, the lookaround matched, but the remainder of the first branch failed to match, and so the second branch was attempted, whereas in the second example, the lookaround matched, and the first branch failed to match, but the second branch was **not** attempted.
+
+Added POSIX matching (leftmost longest) (`Hg issue 150 <https://github.com/mrabarnett/mrab-regex/issues/150>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The POSIX standard for regex is to return the leftmost longest match. This can be turned on using the ``POSIX`` flag (``(?p)``).
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> # Normal matching.
+  >>> regex.search(r'Mr|Mrs', 'Mrs')
+  <regex.Match object; span=(0, 2), match='Mr'>
+  >>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
+  <regex.Match object; span=(0, 7), match='oneself'>
+  >>> # POSIX matching.
+  >>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
+  <regex.Match object; span=(0, 3), match='Mrs'>
+  >>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
+  <regex.Match object; span=(0, 17), match='oneselfsufficient'>
+
+Note that it will take longer to find matches because when it finds a match at a certain position, it won't return that immediately, but will keep looking to see if there's another longer match there.
+
+Added ``(?(DEFINE)...)`` (`Hg issue 152 <https://github.com/mrabarnett/mrab-regex/issues/152>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there's no group called "DEFINE", then ... will be ignored, but any group definitions within it will be available.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.search(r'(?(DEFINE)(?P<quant>\d+)(?P<item>\w+))(?&quant) (?&item)', '5 elephants')
+  <regex.Match object; span=(0, 11), match='5 elephants'>
+
+Added ``(*PRUNE)``, ``(*SKIP)`` and ``(*FAIL)`` (`Hg issue 153 <https://github.com/mrabarnett/mrab-regex/issues/153>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(*PRUNE)`` discards the backtracking info up to that point. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
+
+``(*SKIP)`` is similar to ``(*PRUNE)``, except that it also sets where in the text the next attempt to match will start. When used in an atomic group or a lookaround, it won't affect the enclosing pattern.
+
+``(*FAIL)`` causes immediate backtracking. ``(*F)`` is a permitted abbreviation.
+
+Added ``\K`` (`Hg issue 151 <https://github.com/mrabarnett/mrab-regex/issues/151>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Keeps the part of the entire match after the position where ``\K`` occurred; the part before it is discarded.
+
+It does not affect what capture groups return.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
+  >>> m[0]
+  'cde'
+  >>> m[1]
+  'abcde'
+  >>>
+  >>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
+  >>> m[0]
+  'bc'
+  >>> m[1]
+  'bcdef'
+
+Added capture subscripting for ``expandf`` and ``subf``/``subfn`` (`Hg issue 133 <https://github.com/mrabarnett/mrab-regex/issues/133>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can now use subscripting to get the captures of a repeated capture group.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(\w)+", "abc")
+  >>> m.expandf("{1}")
+  'c'
+  >>> m.expandf("{1[0]} {1[1]} {1[2]}")
+  'a b c'
+  >>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
+  'c b a'
+  >>>
+  >>> m = regex.match(r"(?P<letter>\w)+", "abc")
+  >>> m.expandf("{letter}")
+  'c'
+  >>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
+  'a b c'
+  >>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
+  'c b a'
+
+Added support for referring to a group by number using ``(?P=...)``.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is in addition to the existing ``\g<...>``.
+
+Fixed the handling of locale-sensitive regexes.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``LOCALE`` flag is intended for legacy code and has limited support. You're still recommended to use Unicode instead.
+
+Added partial matches (`Hg issue 102 <https://github.com/mrabarnett/mrab-regex/issues/102>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A partial match is one that matches up to the end of string, but that string has been truncated and you want to know whether a complete match could be possible if the string had not been truncated.
+
+Partial matches are supported by ``match``, ``search``, ``fullmatch`` and ``finditer`` with the ``partial`` keyword argument.
+
+Match objects have a ``partial`` attribute, which is ``True`` if it's a partial match.
+
+For example, if you wanted a user to enter a 4-digit number and check it character by character as it was being entered:
+
+.. sourcecode:: python
+
+  >>> pattern = regex.compile(r'\d{4}')
+
+  >>> # Initially, nothing has been entered:
+  >>> print(pattern.fullmatch('', partial=True))
+  <regex.Match object; span=(0, 0), match='', partial=True>
+
+  >>> # An empty string is OK, but it's only a partial match.
+  >>> # The user enters a letter:
+  >>> print(pattern.fullmatch('a', partial=True))
+  None
+  >>> # It'll never match.
+
+  >>> # The user deletes that and enters a digit:
+  >>> print(pattern.fullmatch('1', partial=True))
+  <regex.Match object; span=(0, 1), match='1', partial=True>
+  >>> # It matches this far, but it's only a partial match.
+
+  >>> # The user enters 2 more digits:
+  >>> print(pattern.fullmatch('123', partial=True))
+  <regex.Match object; span=(0, 3), match='123', partial=True>
+  >>> # It matches this far, but it's only a partial match.
+
+  >>> # The user enters another digit:
+  >>> print(pattern.fullmatch('1234', partial=True))
+  <regex.Match object; span=(0, 4), match='1234'>
+  >>> # It's a complete match.
+
+  >>> # If the user enters another digit:
+  >>> print(pattern.fullmatch('12345', partial=True))
+  None
+  >>> # It's no longer a match.
+
+  >>> # This is a partial match:
+  >>> pattern.match('123', partial=True).partial
+  True
+
+  >>> # This is a complete match:
+  >>> pattern.match('1233', partial=True).partial
+  False
+
+``*`` operator not working correctly with sub() (`Hg issue 106 <https://github.com/mrabarnett/mrab-regex/issues/106>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it's not clear how zero-width matches should be handled. For example, should ``.*`` match 0 characters directly after matching >0 characters?
+
+Examples:
+
+.. sourcecode:: python
+
+  # Python 3.7 and later
+  >>> regex.sub('.*', 'x', 'test')
+  'xx'
+  >>> regex.sub('.*?', '|', 'test')
+  '|||||||||'
+
+  # Python 3.6 and earlier
+  >>> regex.sub('(?V0).*', 'x', 'test')
+  'x'
+  >>> regex.sub('(?V1).*', 'x', 'test')
+  'xx'
+  >>> regex.sub('(?V0).*?', '|', 'test')
+  '|t|e|s|t|'
+  >>> regex.sub('(?V1).*?', '|', 'test')
+  '|||||||||'
+
+Added ``capturesdict`` (`Hg issue 86 <https://github.com/mrabarnett/mrab-regex/issues/86>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``capturesdict`` is a combination of ``groupdict`` and ``captures``:
+
+``groupdict`` returns a dict of the named groups and the last capture of those groups.
+
+``captures`` returns a list of all the captures of a group
+
+``capturesdict`` returns a dict of the named groups and lists of all the captures of those groups.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
+  >>> m.groupdict()
+  {'word': 'three', 'digits': '3'}
+  >>> m.captures("word")
+  ['one', 'two', 'three']
+  >>> m.captures("digits")
+  ['1', '2', '3']
+  >>> m.capturesdict()
+  {'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
+
+Allow duplicate names of groups (`Hg issue 87 <https://github.com/mrabarnett/mrab-regex/issues/87>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Group names can now be duplicated.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> # With optional groups:
+  >>>
+  >>> # Both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['first', 'second']
+  >>> # Only the second group captures.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['second']
+  >>> # Only the first group captures.
+  >>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")
+  >>> m.group("item")
+  'first'
+  >>> m.captures("item")
+  ['first']
+  >>>
+  >>> # With mandatory groups:
+  >>>
+  >>> # Both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['first', 'second']
+  >>> # Again, both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")
+  >>> m.group("item")
+  'second'
+  >>> m.captures("item")
+  ['', 'second']
+  >>> # And yet again, both groups capture, the second capture 'overwriting' the first.
+  >>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")
+  >>> m.group("item")
+  ''
+  >>> m.captures("item")
+  ['first', '']
+
+Added ``fullmatch`` (`issue #16203 <https://bugs.python.org/issue16203>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``fullmatch`` behaves like ``match``, except that it must match all of the string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> print(regex.fullmatch(r"abc", "abc").span())
+  (0, 3)
+  >>> print(regex.fullmatch(r"abc", "abcx"))
+  None
+  >>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
+  (0, 3)
+  >>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
+  (1, 4)
+  >>>
+  >>> regex.match(r"a.*?", "abcd").group(0)
+  'a'
+  >>> regex.fullmatch(r"a.*?", "abcd").group(0)
+  'abcd'
+
+Added ``subf`` and ``subfn``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``subf`` and ``subfn`` are alternatives to ``sub`` and ``subn`` respectively. When passed a replacement string, they treat it as a format string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
+  'foo bar => bar foo'
+  >>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
+  'bar foo'
+
+Added ``expandf`` to match object
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``expandf`` is an alternative to ``expand``. When passed a replacement string, it treats it as a format string.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.match(r"(\w+) (\w+)", "foo bar")
+  >>> m.expandf("{0} => {2} {1}")
+  'foo bar => bar foo'
+  >>>
+  >>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
+  >>> m.expandf("{word2} {word1}")
+  'bar foo'
+
+Detach searched string
+^^^^^^^^^^^^^^^^^^^^^^
+
+A match object contains a reference to the string that was searched, via its ``string`` attribute. The ``detach_string`` method will 'detach' that string, making it available for garbage collection, which might save valuable memory if that string is very large.
+
+Example:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"\w+", "Hello world")
+  >>> print(m.group())
+  Hello
+  >>> print(m.string)
+  Hello world
+  >>> m.detach_string()
+  >>> print(m.group())
+  Hello
+  >>> print(m.string)
+  None
+
+Recursive patterns (`Hg issue 27 <https://github.com/mrabarnett/mrab-regex/issues/27>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Recursive and repeated patterns are supported.
+
+``(?R)`` or ``(?0)`` tries to match the entire regex recursively. ``(?1)``, ``(?2)``, etc, try to match the relevant capture group.
+
+``(?&name)`` tries to match the named capture group.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
+  ('Tarzan',)
+  >>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
+  ('Jane',)
+
+  >>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")
+  >>> m.group(0, 1, 2)
+  ('kayak', 'k', None)
+
+The first two examples show how the subpattern within the capture group is reused, but is _not_ itself a capture group. In other words, ``"(Tarzan|Jane) loves (?1)"`` is equivalent to ``"(Tarzan|Jane) loves (?:Tarzan|Jane)"``.
+
+It's possible to backtrack into a recursed or repeated group.
+
+You can't call a group if there is more than one group with that group name or group number (``"ambiguous group reference"``).
+
+The alternative forms ``(?P>name)`` and ``(?P&name)`` are also supported.
+
+Full Unicode case-folding is supported.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In version 1 behaviour, the regex module uses full case-folding when performing case-insensitive matches in Unicode.
+
+Examples (in Python 3):
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
+  (0, 6)
+  >>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
+  (0, 7)
+
+In version 0 behaviour, it uses simple case-folding for backward compatibility with the re module.
+
+Approximate "fuzzy" matching (`Hg issue 12 <https://github.com/mrabarnett/mrab-regex/issues/12>`_, `Hg issue 41 <https://github.com/mrabarnett/mrab-regex/issues/41>`_, `Hg issue 109 <https://github.com/mrabarnett/mrab-regex/issues/109>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Regex usually attempts an exact match, but sometimes an approximate, or "fuzzy", match is needed, for those cases where the text being searched may contain errors in the form of inserted, deleted or substituted characters.
+
+A fuzzy regex specifies which types of errors are permitted, and, optionally, either the minimum and maximum or only the maximum permitted number of each type. (You cannot specify only a minimum.)
+
+The 3 types of error are:
+
+* Insertion, indicated by "i"
+
+* Deletion, indicated by "d"
+
+* Substitution, indicated by "s"
+
+In addition, "e" indicates any type of error.
+
+The fuzziness of a regex item is specified between "{" and "}" after the item.
+
+Examples:
+
+* ``foo`` match "foo" exactly
+
+* ``(?:foo){i}`` match "foo", permitting insertions
+
+* ``(?:foo){d}`` match "foo", permitting deletions
+
+* ``(?:foo){s}`` match "foo", permitting substitutions
+
+* ``(?:foo){i,s}`` match "foo", permitting insertions and substitutions
+
+* ``(?:foo){e}`` match "foo", permitting errors
+
+If a certain type of error is specified, then any type not specified will **not** be permitted.
+
+In the following examples I'll omit the item and write only the fuzziness:
+
+* ``{d<=3}`` permit at most 3 deletions, but no other types
+
+* ``{i<=1,s<=2}`` permit at most 1 insertion and at most 2 substitutions, but no deletions
+
+* ``{1<=e<=3}`` permit at least 1 and at most 3 errors
+
+* ``{i<=2,d<=2,e<=3}`` permit at most 2 insertions, at most 2 deletions, at most 3 errors in total, but no substitutions
+
+It's also possible to state the costs of each type of error and the maximum permitted total cost.
+
+Examples:
+
+* ``{2i+2d+1s<=4}`` each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
+
+* ``{i<=1,d<=1,s<=1,2i+2d+1s<=4}`` at most 1 insertion, at most 1 deletion, at most 1 substitution; each insertion costs 2, each deletion costs 2, each substitution costs 1, the total cost must not exceed 4
+
+You can also use "<" instead of "<=" if you want an exclusive minimum or maximum.
+
+You can add a test to perform on a character that's substituted or inserted.
+
+Examples:
+
+* ``{s<=2:[a-z]}`` at most 2 substitutions, which must be in the character set ``[a-z]``.
+
+* ``{s<=2,i<=3:\d}`` at most 2 substitutions, at most 3 insertions, which must be digits.
+
+By default, fuzzy matching searches for the first match that meets the given constraints. The ``ENHANCEMATCH`` flag will cause it to attempt to improve the fit (i.e. reduce the number of errors) of the match that it has found.
+
+The ``BESTMATCH`` flag will make it search for the best match instead.
+
+Further examples to note:
+
+* ``regex.search("(dog){e}", "cat and dog")[1]`` returns ``"cat"`` because that matches ``"dog"`` with 3 errors (an unlimited number of errors is permitted).
+
+* ``regex.search("(dog){e<=1}", "cat and dog")[1]`` returns ``" dog"`` (with a leading space) because that matches ``"dog"`` with 1 error, which is within the limit.
+
+* ``regex.search("(?e)(dog){e<=1}", "cat and dog")[1]`` returns ``"dog"`` (without a leading space) because the fuzzy search matches ``" dog"`` with 1 error, which is within the limit, and the ``(?e)`` then it attempts a better fit.
+
+In the first two examples there are perfect matches later in the string, but in neither case is it the first possible match.
+
+The match object has an attribute ``fuzzy_counts`` which gives the total number of substitutions, insertions and deletions.
+
+.. sourcecode:: python
+
+  >>> # A 'raw' fuzzy match:
+  >>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
+  (0, 0, 1)
+  >>> # 0 substitutions, 0 insertions, 1 deletion.
+
+  >>> # A better match might be possible if the ENHANCEMATCH flag used:
+  >>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
+  (0, 0, 0)
+  >>> # 0 substitutions, 0 insertions, 0 deletions.
+
+The match object also has an attribute ``fuzzy_changes`` which gives a tuple of the positions of the substitutions, insertions and deletions.
+
+.. sourcecode:: python
+
+  >>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')
+  >>> m
+  <regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>
+  >>> m.fuzzy_changes
+  ([], [7, 8], [10, 11])
+
+What this means is that if the matched part of the string had been:
+
+.. sourcecode:: python
+
+  'anacondfuuoo bar'
+
+it would've been an exact match.
+
+However, there were insertions at positions 7 and 8:
+
+.. sourcecode:: python
+
+  'anaconda fuuoo bar'
+          ^^
+
+and deletions at positions 10 and 11:
+
+.. sourcecode:: python
+
+  'anaconda f~~oo bar'
+             ^^
+
+So the actual string was:
+
+.. sourcecode:: python
+
+  'anaconda foo bar'
+
+Named lists (`Hg issue 11 <https://github.com/mrabarnett/mrab-regex/issues/11>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\L<name>``
+
+There are occasions where you may want to include a list (actually, a set) of options in a regex.
+
+One way is to build the pattern like this:
+
+.. sourcecode:: python
+
+  >>> p = regex.compile(r"first|second|third|fourth|fifth")
+
+but if the list is large, parsing the resulting regex can take considerable time, and care must also be taken that the strings are properly escaped and properly ordered, for example, "cats" before "cat".
+
+The new alternative is to use a named list:
+
+.. sourcecode:: python
+
+  >>> option_set = ["first", "second", "third", "fourth", "fifth"]
+  >>> p = regex.compile(r"\L<options>", options=option_set)
+
+The order of the items is irrelevant, they are treated as a set. The named lists are available as the ``.named_lists`` attribute of the pattern object :
+
+.. sourcecode:: python
+
+  >>> print(p.named_lists)
+  # Python 3
+  {'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}
+  # Python 2
+  {'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}
+
+If there are any unused keyword arguments, ``ValueError`` will be raised unless you tell it otherwise:
+
+.. sourcecode:: python
+
+  >>> option_set = ["first", "second", "third", "fourth", "fifth"]
+  >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
+  Traceback (most recent call last):
+    File "<stdin>", line 1, in <module>
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
+      return _compile(pattern, flags, ignore_unused, kwargs)
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
+      raise ValueError('unused keyword argument {!a}'.format(any_one))
+  ValueError: unused keyword argument 'other_options'
+  >>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)
+  >>>
+
+Start and end of word
+^^^^^^^^^^^^^^^^^^^^^
+
+``\m`` matches at the start of a word.
+
+``\M`` matches at the end of a word.
+
+Compare with ``\b``, which matches at the start or end of a word.
+
+Unicode line separators
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Normally the only line separator is ``\n`` (``\x0A``), but if the ``WORD`` flag is turned on then the line separators are ``\x0D\x0A``, ``\x0A``, ``\x0B``, ``\x0C`` and ``\x0D``, plus ``\x85``, ``\u2028`` and ``\u2029`` when working with Unicode.
+
+This affects the regex dot ``"."``, which, with the ``DOTALL`` flag turned off, matches any character except a line separator. It also affects the line anchors ``^`` and ``$`` (in multiline mode).
+
+Set operators
+^^^^^^^^^^^^^
+
+**Version 1 behaviour only**
+
+Set operators have been added, and a set ``[...]`` can include nested sets.
+
+The operators, in order of increasing precedence, are:
+
+* ``||`` for union ("x||y" means "x or y")
+
+* ``~~`` (double tilde) for symmetric difference ("x~~y" means "x or y, but not both")
+
+* ``&&`` for intersection ("x&&y" means "x and y")
+
+* ``--`` (double dash) for difference ("x--y" means "x but not y")
+
+Implicit union, ie, simple juxtaposition like in ``[ab]``, has the highest precedence. Thus, ``[ab&&cd]`` is the same as ``[[a||b]&&[c||d]]``.
+
+Examples:
+
+* ``[ab]`` # Set containing 'a' and 'b'
+
+* ``[a-z]`` # Set containing 'a' .. 'z'
+
+* ``[[a-z]--[qw]]`` # Set containing 'a' .. 'z', but not 'q' or 'w'
+
+* ``[a-z--qw]`` # Same as above
+
+* ``[\p{L}--QW]`` # Set containing all letters except 'Q' and 'W'
+
+* ``[\p{N}--[0-9]]`` # Set containing all numbers except '0' .. '9'
+
+* ``[\p{ASCII}&&\p{Letter}]`` # Set containing all characters which are ASCII and letter
+
+regex.escape (`issue #2650 <https://bugs.python.org/issue2650>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+regex.escape has an additional keyword parameter ``special_only``. When True, only 'special' regex characters, such as '?', are escaped.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.escape("foo!?", special_only=False)
+  'foo\\!\\?'
+  >>> regex.escape("foo!?", special_only=True)
+  'foo!\\?'
+
+regex.escape (`Hg issue 249 <https://github.com/mrabarnett/mrab-regex/issues/249>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+regex.escape has an additional keyword parameter ``literal_spaces``. When True, spaces are not escaped.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.escape("foo bar!?", literal_spaces=False)
+  'foo\\ bar!\\?'
+  >>> regex.escape("foo bar!?", literal_spaces=True)
+  'foo bar!\\?'
+
+Repeated captures (`issue #7132 <https://bugs.python.org/issue7132>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A match object has additional methods which return information on all the successful matches of a repeated capture group. These methods are:
+
+* ``matchobject.captures([group1, ...])``
+
+  * Returns a list of the strings matched in a group or groups. Compare with ``matchobject.group([group1, ...])``.
+
+* ``matchobject.starts([group])``
+
+  * Returns a list of the start positions. Compare with ``matchobject.start([group])``.
+
+* ``matchobject.ends([group])``
+
+  * Returns a list of the end positions. Compare with ``matchobject.end([group])``.
+
+* ``matchobject.spans([group])``
+
+  * Returns a list of the spans. Compare with ``matchobject.span([group])``.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"(\w{3})+", "123456789")
+  >>> m.group(1)
+  '789'
+  >>> m.captures(1)
+  ['123', '456', '789']
+  >>> m.start(1)
+  6
+  >>> m.starts(1)
+  [0, 3, 6]
+  >>> m.end(1)
+  9
+  >>> m.ends(1)
+  [3, 6, 9]
+  >>> m.span(1)
+  (6, 9)
+  >>> m.spans(1)
+  [(0, 3), (3, 6), (6, 9)]
+
+Atomic grouping (`issue #433030 <https://bugs.python.org/issue433030>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?>...)``
+
+If the following pattern subsequently fails, then the subpattern as a whole will fail.
+
+Possessive quantifiers.
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?:...)?+`` ; ``(?:...)*+`` ; ``(?:...)++`` ; ``(?:...){min,max}+``
+
+The subpattern is matched up to 'max' times. If the following pattern subsequently fails, then all of the repeated subpatterns will fail as a whole. For example, ``(?:...)++`` is equivalent to ``(?>(?:...)+)``.
+
+Scoped flags (`issue #433028 <https://bugs.python.org/issue433028>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``(?flags-flags:...)``
+
+The flags will apply only to the subpattern. Flags can be turned on or off.
+
+Definition of 'word' character (`issue #1693050 <https://bugs.python.org/issue1693050>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The definition of a 'word' character has been expanded for Unicode. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
+
+Variable-length lookbehind
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A lookbehind can match a variable-length string.
+
+Flags argument for regex.split, regex.sub and regex.subn (`issue #3482 <https://bugs.python.org/issue3482>`_)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.split``, ``regex.sub`` and ``regex.subn`` support a 'flags' argument.
+
+Pos and endpos arguments for regex.sub and regex.subn
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.sub`` and ``regex.subn`` support 'pos' and 'endpos' arguments.
+
+'Overlapped' argument for regex.findall and regex.finditer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``regex.findall`` and ``regex.finditer`` support an 'overlapped' flag which permits overlapped matches.
+
+Splititer
+^^^^^^^^^
+
+``regex.splititer`` has been added. It's a generator equivalent of ``regex.split``.
+
+Subscripting for groups
+^^^^^^^^^^^^^^^^^^^^^^^
+
+A match object accepts access to the captured groups via subscripting and slicing:
+
+.. sourcecode:: python
+
+  >>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")
+  >>> print(m["before"])
+  pqr
+  >>> print(len(m))
+  4
+  >>> print(m[:])
+  ('pqr123stu', 'pqr', '123', 'stu')
+
+Named groups
+^^^^^^^^^^^^
+
+Groups can be named with ``(?<name>...)`` as well as the current ``(?P<name>...)``.
+
+Group references
+^^^^^^^^^^^^^^^^
+
+Groups can be referenced within a pattern with ``\g<name>``. This also allows there to be more than 99 groups.
+
+Named characters
+^^^^^^^^^^^^^^^^
+
+``\N{name}``
+
+Named characters are supported. (Note: only those known by Python's Unicode database are supported.)
+
+Unicode codepoint properties, including scripts and blocks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\p{property=value}``; ``\P{property=value}``; ``\p{value}`` ; ``\P{value}``
+
+Many Unicode properties are supported, including blocks and scripts. ``\p{property=value}`` or ``\p{property:value}`` matches a character whose property ``property`` has value ``value``. The inverse of ``\p{property=value}`` is ``\P{property=value}`` or ``\p{^property=value}``.
+
+If the short form ``\p{value}`` is used, the properties are checked in the order: ``General_Category``, ``Script``, ``Block``, binary property:
+
+* ``Latin``, the 'Latin' script (``Script=Latin``).
+
+* ``BasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
+
+* ``Alphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
+
+A short form starting with ``Is`` indicates a script or binary property:
+
+* ``IsLatin``, the 'Latin' script (``Script=Latin``).
+
+* ``IsAlphabetic``, the 'Alphabetic' binary property (``Alphabetic=Yes``).
+
+A short form starting with ``In`` indicates a block property:
+
+* ``InBasicLatin``, the 'BasicLatin' block (``Block=BasicLatin``).
+
+POSIX character classes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``[[:alpha:]]``; ``[[:^alpha:]]``
+
+POSIX character classes are supported. These are normally treated as an alternative form of ``\p{...}``.
+
+The exceptions are ``alnum``, ``digit``, ``punct`` and ``xdigit``, whose definitions are different from those of Unicode.
+
+``[[:alnum:]]`` is equivalent to ``\p{posix_alnum}``.
+
+``[[:digit:]]`` is equivalent to ``\p{posix_digit}``.
+
+``[[:punct:]]`` is equivalent to ``\p{posix_punct}``.
+
+``[[:xdigit:]]`` is equivalent to ``\p{posix_xdigit}``.
+
+Search anchor
+^^^^^^^^^^^^^
+
+``\G``
+
+A search anchor has been added. It matches at the position where each search started/continued and can be used for contiguous matches or in negative variable-length lookbehinds to limit how far back the lookbehind goes:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r"\w{2}", "abcd ef")
+  ['ab', 'cd', 'ef']
+  >>> regex.findall(r"\G\w{2}", "abcd ef")
+  ['ab', 'cd']
+
+* The search starts at position 0 and matches 2 letters 'ab'.
+
+* The search continues at position 2 and matches 2 letters 'cd'.
+
+* The search continues at position 4 and fails to match any letters.
+
+* The anchor stops the search start position from being advanced, so there are no more results.
+
+Reverse searching
+^^^^^^^^^^^^^^^^^
+
+Searches can now work backwards:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r".", "abc")
+  ['a', 'b', 'c']
+  >>> regex.findall(r"(?r).", "abc")
+  ['c', 'b', 'a']
+
+Note: the result of a reverse search is not necessarily the reverse of a forward search:
+
+.. sourcecode:: python
+
+  >>> regex.findall(r"..", "abcde")
+  ['ab', 'cd']
+  >>> regex.findall(r"(?r)..", "abcde")
+  ['de', 'bc']
+
+Matching a single grapheme
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``\X``
+
+The grapheme matcher is supported. It now conforms to the Unicode specification at ``http://www.unicode.org/reports/tr29/``.
+
+Branch reset
+^^^^^^^^^^^^
+
+``(?|...|...)``
+
+Capture group numbers will be reused across the alternatives, but groups with different names will have different group numbers.
+
+Examples:
+
+.. sourcecode:: python
+
+  >>> regex.match(r"(?|(first)|(second))", "first").groups()
+  ('first',)
+  >>> regex.match(r"(?|(first)|(second))", "second").groups()
+  ('second',)
+
+Note that there is only one group.
+
+Default Unicode word boundary
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``WORD`` flag changes the definition of a 'word boundary' to that of a default Unicode word boundary. This applies to ``\b`` and ``\B``.
+
+Timeout (Python 3)
+^^^^^^^^^^^^^^^^^^
+
+The matching methods and functions support timeouts. The timeout (in seconds) applies to the entire operation:
+
+.. sourcecode:: python
+
+  >>> from time import sleep
+  >>>
+  >>> def fast_replace(m):
+  ...     return 'X'
+  ...
+  >>> def slow_replace(m):
+  ...     sleep(0.5)
+  ...     return 'X'
+  ...
+  >>> regex.sub(r'[a-z]', fast_replace, 'abcde', timeout=2)
+  'XXXXX'
+  >>> regex.sub(r'[a-z]', slow_replace, 'abcde', timeout=2)
+  Traceback (most recent call last):
+    File "<stdin>", line 1, in <module>
+    File "C:\Python37\lib\site-packages\regex\regex.py", line 276, in sub
+      endpos, concurrent, timeout)
+  TimeoutError: regex timed out
+
+
diff --git a/regex_2/_regex.c b/regex_2/_regex.c
index 126dac8e330be252205be117a3483807620741c0..afc203464632e6f0ca3a761c8bd17ffbf1810de5 100644
--- a/regex_2/_regex.c
+++ b/regex_2/_regex.c
@@ -9593,7 +9593,7 @@ Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos)
     return RE_ERROR_FAILURE;
 }
 
-/* Records a change in a fuzzy change. */
+/* Records a fuzzy change. */
 Py_LOCAL_INLINE(BOOL) record_fuzzy(RE_State* state, RE_UINT8 fuzzy_type,
   Py_ssize_t text_pos) {
     RE_FuzzyChangesList* change_list;
@@ -10219,7 +10219,7 @@ Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_State* state, RE_Node** node) {
 
     if (state->text_pos == limit || !insertion_permitted(state,
       state->fuzzy_node, state->fuzzy_counts) || !fuzzy_ext_match(state,
-      curr_node->nonstring.next_2.node, state->text_pos)) {
+      state->fuzzy_node, state->text_pos)) {
         while (count > 0) {
             unrecord_fuzzy(state);
             --state->fuzzy_counts[RE_FUZZY_INS];
@@ -15279,9 +15279,9 @@ backtrack:
              * bstack: -
              */
 
-            if (insertion_permitted(state, inner_node, inner_counts) && 
-              total_errors(state->fuzzy_counts) + total_errors(inner_counts) < 
-              state->max_errors && fuzzy_ext_match(state, inner_node, 
+            if (insertion_permitted(state, inner_node, inner_counts) &&
+              total_errors(state->fuzzy_counts) + total_errors(inner_counts) <
+              state->max_errors && fuzzy_ext_match(state, inner_node,
               state->text_pos)) {
                 RE_INT8 step;
                 Py_ssize_t limit;
@@ -15348,6 +15348,10 @@ backtrack:
              */
 
             inner_counts[RE_FUZZY_INS] -= insertions;
+            while (insertions > 0) {
+                unrecord_fuzzy(state);
+                --insertions;
+            }
 
             /* Restore the inner fuzzy info. */
             Py_MEMCPY(state->fuzzy_counts, inner_counts,
@@ -21059,11 +21063,17 @@ static void capture_dealloc(PyObject* self_) {
 static PyObject* capture_str(PyObject* self_) {
     CaptureObject* self;
     MatchObject* match;
+    PyObject* default_value;
+    PyObject* result;
 
     self = (CaptureObject*)self_;
     match = *self->match_indirect;
 
-    return match_get_group_by_index(match, self->group_index, Py_None);
+    default_value = PySequence_GetSlice(match->string, 0, 0);
+    result = match_get_group_by_index(match, self->group_index, default_value);
+    Py_DECREF(default_value);
+
+    return result;
 }
 
 static PyMemberDef splitter_members[] = {
@@ -24551,171 +24561,6 @@ Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) {
     return RE_ERROR_SUCCESS;
 }
 
-Py_LOCAL_INLINE(BOOL) section_contains_repeat(RE_CODE** code, RE_CODE*
-  end_code);
-
-/* Checks whether a subsection of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) subsection_contains_repeat(int initial, RE_CODE** code,
-  RE_CODE* end_code) {
-    /* codes: code*initial, ..., (next, ..., )* end. */
-    *code += initial;
-
-    if (*code >= end_code)
-        return FALSE;
-
-    if (section_contains_repeat(code, end_code))
-        return TRUE;
-
-    while (*code < end_code && **code == RE_OP_NEXT) {
-        ++(*code);
-
-        if (*code >= end_code)
-            return FALSE;
-
-        if (section_contains_repeat(code, end_code))
-            return TRUE;
-    }
-
-    if (*code >= end_code)
-        return FALSE;
-
-    ++(*code);
-
-    return FALSE;
-}
-
-/* Checks whether a section of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) section_contains_repeat(RE_CODE** code, RE_CODE*
-  end_code) {
-    while (*code < end_code) {
-        /* The following code groups opcodes by format, not function. */
-        switch ((*code)[0]) {
-        case RE_OP_ANY:
-        case RE_OP_ANY_ALL:
-        case RE_OP_ANY_ALL_REV:
-        case RE_OP_ANY_REV:
-        case RE_OP_ANY_U:
-        case RE_OP_ANY_U_REV:
-        case RE_OP_FAILURE:
-        case RE_OP_PRUNE:
-        case RE_OP_SUCCESS:
-            /* codes: opcode. */
-            ++(*code);
-            break;
-        case RE_OP_ATOMIC:
-        case RE_OP_BRANCH:
-        case RE_OP_GROUP_EXISTS:
-            /* codes: opcode, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(1, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_BOUNDARY:
-        case RE_OP_CALL_REF:
-        case RE_OP_DEFAULT_BOUNDARY:
-        case RE_OP_DEFAULT_END_OF_WORD:
-        case RE_OP_DEFAULT_START_OF_WORD:
-        case RE_OP_END_OF_LINE:
-        case RE_OP_END_OF_LINE_U:
-        case RE_OP_END_OF_STRING:
-        case RE_OP_END_OF_STRING_LINE:
-        case RE_OP_END_OF_STRING_LINE_U:
-        case RE_OP_END_OF_WORD:
-        case RE_OP_GRAPHEME_BOUNDARY:
-        case RE_OP_GROUP_CALL:
-        case RE_OP_KEEP:
-        case RE_OP_SEARCH_ANCHOR:
-        case RE_OP_SKIP:
-        case RE_OP_START_OF_LINE:
-        case RE_OP_START_OF_LINE_U:
-        case RE_OP_START_OF_STRING:
-        case RE_OP_START_OF_WORD:
-            /* codes: opcode, value. */
-            *code += 2;
-            break;
-        case RE_OP_CHARACTER:
-        case RE_OP_CHARACTER_IGN:
-        case RE_OP_CHARACTER_IGN_REV:
-        case RE_OP_CHARACTER_REV:
-        case RE_OP_PROPERTY:
-        case RE_OP_PROPERTY_IGN:
-        case RE_OP_PROPERTY_IGN_REV:
-        case RE_OP_PROPERTY_REV:
-        case RE_OP_REF_GROUP:
-        case RE_OP_REF_GROUP_FLD:
-        case RE_OP_REF_GROUP_FLD_REV:
-        case RE_OP_REF_GROUP_IGN:
-        case RE_OP_REF_GROUP_IGN_REV:
-        case RE_OP_REF_GROUP_REV:
-            /* codes: opcode, value, value. */
-            *code += 3;
-            break;
-        case RE_OP_CONDITIONAL:
-        case RE_OP_FUZZY:
-        case RE_OP_LOOKAROUND:
-            /* codes: opcode, value, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(3, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_GREEDY_REPEAT:
-        case RE_OP_LAZY_REPEAT:
-            return TRUE;
-        case RE_OP_GROUP:
-            /* codes: opcode, value, value, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(4, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_RANGE:
-        case RE_OP_RANGE_IGN:
-        case RE_OP_RANGE_IGN_REV:
-        case RE_OP_RANGE_REV:
-            /* codes: opcode, value, value, value. */
-            *code += 4;
-            break;
-        case RE_OP_SET_DIFF:
-        case RE_OP_SET_DIFF_IGN:
-        case RE_OP_SET_DIFF_IGN_REV:
-        case RE_OP_SET_DIFF_REV:
-        case RE_OP_SET_INTER:
-        case RE_OP_SET_INTER_IGN:
-        case RE_OP_SET_INTER_IGN_REV:
-        case RE_OP_SET_INTER_REV:
-        case RE_OP_SET_SYM_DIFF:
-        case RE_OP_SET_SYM_DIFF_IGN:
-        case RE_OP_SET_SYM_DIFF_IGN_REV:
-        case RE_OP_SET_SYM_DIFF_REV:
-        case RE_OP_SET_UNION:
-        case RE_OP_SET_UNION_IGN:
-        case RE_OP_SET_UNION_IGN_REV:
-        case RE_OP_SET_UNION_REV:
-            /* codes: opcode, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(2, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_STRING:
-        case RE_OP_STRING_FLD:
-        case RE_OP_STRING_FLD_REV:
-        case RE_OP_STRING_IGN:
-        case RE_OP_STRING_IGN_REV:
-        case RE_OP_STRING_REV:
-            /* codes: opcode, value, length, .... */
-            *code += 3 + (*code)[2];
-            break;
-        default:
-            /* We've found an opcode which we don't recognise. We'll leave it
-             * for the caller.
-             */
-            return FALSE;
-        }
-    }
-
-    return FALSE;
-}
-
-/* Checks whether a section of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) contains_repeat(RE_CODE* code, RE_CODE* end_code) {
-    return section_contains_repeat(&code, end_code);
-}
-
 /* Builds a REPEAT node. */
 Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) {
     BOOL greedy;
diff --git a/regex_2/_regex_core.py b/regex_2/_regex_core.py
index 0b5a2ddee43b82a5437a6703a2dfeecc1684fea3..5953ec617909d5592812faa1ccde22bd2e055b29 100644
--- a/regex_2/_regex_core.py
+++ b/regex_2/_regex_core.py
@@ -452,7 +452,7 @@ def parse_sequence(source, info):
                     sequence.append(None)
                 else:
                     # It's not a quantifier. Maybe it's a fuzzy constraint.
-                    constraints = parse_fuzzy(source, info, ch)
+                    constraints = parse_fuzzy(source, info, ch, case_flags)
                     if constraints:
                         # It _is_ a fuzzy constraint.
                         apply_constraint(source, info, constraints, case_flags,
@@ -569,7 +569,7 @@ def parse_limited_quantifier(source):
 
     return min_count, max_count
 
-def parse_fuzzy(source, info, ch):
+def parse_fuzzy(source, info, ch, case_flags):
     "Parses a fuzzy setting, if present."
     saved_pos = source.pos
 
@@ -586,7 +586,7 @@ def parse_fuzzy(source, info, ch):
         return None
 
     if source.match(":"):
-        constraints["test"] = parse_fuzzy_test(source, info)
+        constraints["test"] = parse_fuzzy_test(source, info, case_flags)
 
     if not source.match("}"):
         raise error("expected }", source.string, source.pos)
@@ -734,7 +734,7 @@ def parse_cost_term(source, cost):
 
     cost[ch] = int(coeff or 1)
 
-def parse_fuzzy_test(source, info):
+def parse_fuzzy_test(source, info, case_flags):
     saved_pos = source.pos
     ch = source.get()
     if ch in SPECIAL_CHARS:
diff --git a/regex_2/regex.py b/regex_2/regex.py
index f27ed183319c246b9ef13feed060bf4b9bf83038..3e2f5503146901a798581d2c6f76b8be9e37f137 100644
--- a/regex_2/regex.py
+++ b/regex_2/regex.py
@@ -241,7 +241,7 @@ __all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
   "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
   "__doc__"]
 
-__version__ = "2.5.103"
+__version__ = "2.5.109"
 
 # --------------------------------------------------------------------
 # Public interface.
diff --git a/regex_2/test_regex.py b/regex_2/test_regex.py
index dc0f0ff0fcd9f3efe8a49bcfdfa0665bef607dba..3afa41d25a3c31d645c3e51b709cca560c00e264 100644
--- a/regex_2/test_regex.py
+++ b/regex_2/test_regex.py
@@ -4114,6 +4114,41 @@ thing
         self.assertEqual(p.search('10 months 1 hour ago').group(), '1 hour ago')
         self.assertEqual(p.search('1 month 10 hours ago').group(), '10 hours ago')
 
+        # Git issue 427: Possible bug with BESTMATCH
+        sequence = 'TTCAGACGTGTGCTCTTCCGATCTCAATACCGACTCCTCACTGTGTGTCT'
+        pattern = r'(?P<insert>.*)(?P<anchor>CTTCC){e<=1}(?P<umi>([ACGT]){4,6})(?P<sid>CAATACCGACTCCTCACTGTGT){e<=2}(?P<end>([ACGT]){0,6}$)'
+
+        m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
+        self.assertEqual(m.span(), (0, 50))
+        self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'})
+
+        m = regex.match(pattern, sequence, flags=regex.ENHANCEMATCH)
+        self.assertEqual(m.span(), (0, 50))
+        self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'})
+
+        # Git issue 433: Disagreement between fuzzy_counts and fuzzy_changes
+        pattern = r'(?P<insert>.*)(?P<anchor>AACACTGG){e<=1}(?P<umi>([AT][CG]){5}){e<=2}(?P<sid>GTAACCGAAG){e<=2}(?P<end>([ACGT]){0,6}$)'
+
+        sequence = 'GGAAAACACTGGTCTCAGTCTCGTAACCGAAGTGGTCG'
+        m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
+        self.assertEqual(m.fuzzy_counts, (0, 0, 0))
+        self.assertEqual(m.fuzzy_changes, ([], [], []))
+
+        sequence = 'GGAAAACACTGGTCTCAGTCTCGTCCCCGAAGTGGTCG'
+        m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
+        self.assertEqual(m.fuzzy_counts, (2, 0, 0))
+        self.assertEqual(m.fuzzy_changes, ([24, 25], [], []))
+
+        # Git issue 439: Unmatched groups: sub vs subf
+        self.assertEqual(regex.sub(r'(test1)|(test2)', r'matched: \1\2', 'test1'), 'matched: test1')
+        self.assertEqual(regex.subf(r'(test1)|(test2)', r'matched: {1}{2}', 'test1'), 'matched: test1')
+        self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expand(r'matched: \1\2'), 'matched: test1'),
+        self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expandf(r'matched: {1}{2}'), 'matched: test1')
+
+        # Git issue 442: Fuzzy regex matching doesn't seem to test insertions correctly
+        self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having"), None)
+        self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having", flags=regex.I), None)
+
     def test_fuzzy_ext(self):
         self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
           True)
diff --git a/regex_3/_regex.c b/regex_3/_regex.c
index ef7cbf16ade42f2b70290bfa81e44edbf1892ef5..5e3f51b74c5830ca8b0b2f0e560b2c81d296c46b 100644
--- a/regex_3/_regex.c
+++ b/regex_3/_regex.c
@@ -9655,7 +9655,7 @@ Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos)
     return RE_ERROR_FAILURE;
 }
 
-/* Records a change in a fuzzy change. */
+/* Records a fuzzy change. */
 Py_LOCAL_INLINE(BOOL) record_fuzzy(RE_State* state, RE_UINT8 fuzzy_type,
   Py_ssize_t text_pos) {
     RE_FuzzyChangesList* change_list;
@@ -10281,7 +10281,7 @@ Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_State* state, RE_Node** node) {
 
     if (state->text_pos == limit || !insertion_permitted(state,
       state->fuzzy_node, state->fuzzy_counts) || !fuzzy_ext_match(state,
-      curr_node->nonstring.next_2.node, state->text_pos)) {
+      state->fuzzy_node, state->text_pos)) {
         while (count > 0) {
             unrecord_fuzzy(state);
             --state->fuzzy_counts[RE_FUZZY_INS];
@@ -15341,9 +15341,9 @@ backtrack:
              * bstack: -
              */
 
-            if (insertion_permitted(state, inner_node, inner_counts) && 
-              total_errors(state->fuzzy_counts) + total_errors(inner_counts) < 
-              state->max_errors && fuzzy_ext_match(state, inner_node, 
+            if (insertion_permitted(state, inner_node, inner_counts) &&
+              total_errors(state->fuzzy_counts) + total_errors(inner_counts) <
+              state->max_errors && fuzzy_ext_match(state, inner_node,
               state->text_pos)) {
                 RE_INT8 step;
                 Py_ssize_t limit;
@@ -15410,6 +15410,10 @@ backtrack:
              */
 
             inner_counts[RE_FUZZY_INS] -= insertions;
+            while (insertions > 0) {
+                unrecord_fuzzy(state);
+                --insertions;
+            }
 
             /* Restore the inner fuzzy info. */
             Py_MEMCPY(state->fuzzy_counts, inner_counts,
@@ -21153,11 +21157,17 @@ static void capture_dealloc(PyObject* self_) {
 static PyObject* capture_str(PyObject* self_) {
     CaptureObject* self;
     MatchObject* match;
+    PyObject* default_value;
+    PyObject* result;
 
     self = (CaptureObject*)self_;
     match = *self->match_indirect;
 
-    return match_get_group_by_index(match, self->group_index, Py_None);
+    default_value = PySequence_GetSlice(match->string, 0, 0);
+    result = match_get_group_by_index(match, self->group_index, default_value);
+    Py_DECREF(default_value);
+
+    return result;
 }
 
 static PyMemberDef splitter_members[] = {
@@ -24772,171 +24782,6 @@ Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) {
     return RE_ERROR_SUCCESS;
 }
 
-Py_LOCAL_INLINE(BOOL) section_contains_repeat(RE_CODE** code, RE_CODE*
-  end_code);
-
-/* Checks whether a subsection of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) subsection_contains_repeat(int initial, RE_CODE** code,
-  RE_CODE* end_code) {
-    /* codes: code*initial, ..., (next, ..., )* end. */
-    *code += initial;
-
-    if (*code >= end_code)
-        return FALSE;
-
-    if (section_contains_repeat(code, end_code))
-        return TRUE;
-
-    while (*code < end_code && **code == RE_OP_NEXT) {
-        ++(*code);
-
-        if (*code >= end_code)
-            return FALSE;
-
-        if (section_contains_repeat(code, end_code))
-            return TRUE;
-    }
-
-    if (*code >= end_code)
-        return FALSE;
-
-    ++(*code);
-
-    return FALSE;
-}
-
-/* Checks whether a section of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) section_contains_repeat(RE_CODE** code, RE_CODE*
-  end_code) {
-    while (*code < end_code) {
-        /* The following code groups opcodes by format, not function. */
-        switch ((*code)[0]) {
-        case RE_OP_ANY:
-        case RE_OP_ANY_ALL:
-        case RE_OP_ANY_ALL_REV:
-        case RE_OP_ANY_REV:
-        case RE_OP_ANY_U:
-        case RE_OP_ANY_U_REV:
-        case RE_OP_FAILURE:
-        case RE_OP_PRUNE:
-        case RE_OP_SUCCESS:
-            /* codes: opcode. */
-            ++(*code);
-            break;
-        case RE_OP_ATOMIC:
-        case RE_OP_BRANCH:
-        case RE_OP_GROUP_EXISTS:
-            /* codes: opcode, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(1, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_BOUNDARY:
-        case RE_OP_CALL_REF:
-        case RE_OP_DEFAULT_BOUNDARY:
-        case RE_OP_DEFAULT_END_OF_WORD:
-        case RE_OP_DEFAULT_START_OF_WORD:
-        case RE_OP_END_OF_LINE:
-        case RE_OP_END_OF_LINE_U:
-        case RE_OP_END_OF_STRING:
-        case RE_OP_END_OF_STRING_LINE:
-        case RE_OP_END_OF_STRING_LINE_U:
-        case RE_OP_END_OF_WORD:
-        case RE_OP_GRAPHEME_BOUNDARY:
-        case RE_OP_GROUP_CALL:
-        case RE_OP_KEEP:
-        case RE_OP_SEARCH_ANCHOR:
-        case RE_OP_SKIP:
-        case RE_OP_START_OF_LINE:
-        case RE_OP_START_OF_LINE_U:
-        case RE_OP_START_OF_STRING:
-        case RE_OP_START_OF_WORD:
-            /* codes: opcode, value. */
-            *code += 2;
-            break;
-        case RE_OP_CHARACTER:
-        case RE_OP_CHARACTER_IGN:
-        case RE_OP_CHARACTER_IGN_REV:
-        case RE_OP_CHARACTER_REV:
-        case RE_OP_PROPERTY:
-        case RE_OP_PROPERTY_IGN:
-        case RE_OP_PROPERTY_IGN_REV:
-        case RE_OP_PROPERTY_REV:
-        case RE_OP_REF_GROUP:
-        case RE_OP_REF_GROUP_FLD:
-        case RE_OP_REF_GROUP_FLD_REV:
-        case RE_OP_REF_GROUP_IGN:
-        case RE_OP_REF_GROUP_IGN_REV:
-        case RE_OP_REF_GROUP_REV:
-            /* codes: opcode, value, value. */
-            *code += 3;
-            break;
-        case RE_OP_CONDITIONAL:
-        case RE_OP_FUZZY:
-        case RE_OP_LOOKAROUND:
-            /* codes: opcode, value, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(3, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_GREEDY_REPEAT:
-        case RE_OP_LAZY_REPEAT:
-            return TRUE;
-        case RE_OP_GROUP:
-            /* codes: opcode, value, value, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(4, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_RANGE:
-        case RE_OP_RANGE_IGN:
-        case RE_OP_RANGE_IGN_REV:
-        case RE_OP_RANGE_REV:
-            /* codes: opcode, value, value, value. */
-            *code += 4;
-            break;
-        case RE_OP_SET_DIFF:
-        case RE_OP_SET_DIFF_IGN:
-        case RE_OP_SET_DIFF_IGN_REV:
-        case RE_OP_SET_DIFF_REV:
-        case RE_OP_SET_INTER:
-        case RE_OP_SET_INTER_IGN:
-        case RE_OP_SET_INTER_IGN_REV:
-        case RE_OP_SET_INTER_REV:
-        case RE_OP_SET_SYM_DIFF:
-        case RE_OP_SET_SYM_DIFF_IGN:
-        case RE_OP_SET_SYM_DIFF_IGN_REV:
-        case RE_OP_SET_SYM_DIFF_REV:
-        case RE_OP_SET_UNION:
-        case RE_OP_SET_UNION_IGN:
-        case RE_OP_SET_UNION_IGN_REV:
-        case RE_OP_SET_UNION_REV:
-            /* codes: opcode, value, ..., (next, ..., )* end. */
-            if (subsection_contains_repeat(2, code, end_code))
-                return TRUE;
-            break;
-        case RE_OP_STRING:
-        case RE_OP_STRING_FLD:
-        case RE_OP_STRING_FLD_REV:
-        case RE_OP_STRING_IGN:
-        case RE_OP_STRING_IGN_REV:
-        case RE_OP_STRING_REV:
-            /* codes: opcode, value, length, .... */
-            *code += 3 + (*code)[2];
-            break;
-        default:
-            /* We've found an opcode which we don't recognise. We'll leave it
-             * for the caller.
-             */
-            return FALSE;
-        }
-    }
-
-    return FALSE;
-}
-
-/* Checks whether a section of code contains a repeat. */
-Py_LOCAL_INLINE(BOOL) contains_repeat(RE_CODE* code, RE_CODE* end_code) {
-    return section_contains_repeat(&code, end_code);
-}
-
 /* Builds a REPEAT node. */
 Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) {
     BOOL greedy;
diff --git a/regex_3/_regex_core.py b/regex_3/_regex_core.py
index 93a110b9bed9c7438ff6f795b87c1a0c9bf1f646..033753e7924bfe3ee60b8c578a6a309e79ab8373 100644
--- a/regex_3/_regex_core.py
+++ b/regex_3/_regex_core.py
@@ -453,7 +453,7 @@ def parse_sequence(source, info):
                     sequence.append(None)
                 else:
                     # It's not a quantifier. Maybe it's a fuzzy constraint.
-                    constraints = parse_fuzzy(source, info, ch)
+                    constraints = parse_fuzzy(source, info, ch, case_flags)
                     if constraints:
                         # It _is_ a fuzzy constraint.
                         apply_constraint(source, info, constraints, case_flags,
@@ -570,7 +570,7 @@ def parse_limited_quantifier(source):
 
     return min_count, max_count
 
-def parse_fuzzy(source, info, ch):
+def parse_fuzzy(source, info, ch, case_flags):
     "Parses a fuzzy setting, if present."
     saved_pos = source.pos
 
@@ -587,7 +587,7 @@ def parse_fuzzy(source, info, ch):
         return None
 
     if source.match(":"):
-        constraints["test"] = parse_fuzzy_test(source, info)
+        constraints["test"] = parse_fuzzy_test(source, info, case_flags)
 
     if not source.match("}"):
         raise error("expected }", source.string, source.pos)
@@ -735,7 +735,7 @@ def parse_cost_term(source, cost):
 
     cost[ch] = int(coeff or 1)
 
-def parse_fuzzy_test(source, info):
+def parse_fuzzy_test(source, info, case_flags):
     saved_pos = source.pos
     ch = source.get()
     if ch in SPECIAL_CHARS:
diff --git a/regex_3/regex.py b/regex_3/regex.py
index 0795a9422a6d1d3e85e2b6897da4358619ddaef2..2ab8675af525b6c235c41b1026e4e3d9498a385a 100644
--- a/regex_3/regex.py
+++ b/regex_3/regex.py
@@ -241,7 +241,7 @@ __all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
   "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
   "__doc__"]
 
-__version__ = "2.5.103"
+__version__ = "2.5.109"
 
 # --------------------------------------------------------------------
 # Public interface.
diff --git a/regex_3/test_regex.py b/regex_3/test_regex.py
index 6d3f7740281cffd20ee763121efe4ef12a08df50..f40b790f284ed0970f16f72cea1ff6b32bac3a65 100644
--- a/regex_3/test_regex.py
+++ b/regex_3/test_regex.py
@@ -4198,6 +4198,10 @@ thing
           'x right').capturesdict(), {'mydef': ['right'], 'wrong': [], 'right':
           ['right']})
 
+        # Hg issue 338: specifying allowed characters when fuzzy-matching
+        self.assertEqual(bool(regex.match(r'(?:cat){e<=1:[u]}', 'cut')), True)
+        self.assertEqual(bool(regex.match(r'(?:cat){e<=1:u}', 'cut')), True)
+
         # Hg issue 353: fuzzy changes negative indexes
         self.assertEqual(regex.search(r'(?be)(AGTGTTCCCCGCGCCAGCGGGGATAAACCG){s<=5,i<=5,d<=5,s+i+d<=10}',
           'TTCCCCGCGCCAGCGGGGATAAACCG').fuzzy_changes, ([], [], [0, 1, 3, 5]))
@@ -4284,8 +4288,8 @@ thing
         self.assertEqual(p.search('1 month 10 hours ago').group(), '10 hours ago')
 
         # Git issue 427: Possible bug with BESTMATCH
-        sequence ='TTCAGACGTGTGCTCTTCCGATCTCAATACCGACTCCTCACTGTGTGTCT'
-        pattern = '(?P<insert>.*)(?P<anchor>CTTCC){e<=1}(?P<umi>([ACGT]){4,6})(?P<sid>CAATACCGACTCCTCACTGTGT){e<=2}(?P<end>([ACGT]){0,6}$)'
+        sequence = 'TTCAGACGTGTGCTCTTCCGATCTCAATACCGACTCCTCACTGTGTGTCT'
+        pattern = r'(?P<insert>.*)(?P<anchor>CTTCC){e<=1}(?P<umi>([ACGT]){4,6})(?P<sid>CAATACCGACTCCTCACTGTGT){e<=2}(?P<end>([ACGT]){0,6}$)'
 
         m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
         self.assertEqual(m.span(), (0, 50))
@@ -4295,6 +4299,29 @@ thing
         self.assertEqual(m.span(), (0, 50))
         self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'})
 
+        # Git issue 433: Disagreement between fuzzy_counts and fuzzy_changes
+        pattern = r'(?P<insert>.*)(?P<anchor>AACACTGG){e<=1}(?P<umi>([AT][CG]){5}){e<=2}(?P<sid>GTAACCGAAG){e<=2}(?P<end>([ACGT]){0,6}$)'
+
+        sequence = 'GGAAAACACTGGTCTCAGTCTCGTAACCGAAGTGGTCG'
+        m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
+        self.assertEqual(m.fuzzy_counts, (0, 0, 0))
+        self.assertEqual(m.fuzzy_changes, ([], [], []))
+
+        sequence = 'GGAAAACACTGGTCTCAGTCTCGTCCCCGAAGTGGTCG'
+        m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
+        self.assertEqual(m.fuzzy_counts, (2, 0, 0))
+        self.assertEqual(m.fuzzy_changes, ([24, 25], [], []))
+
+        # Git issue 439: Unmatched groups: sub vs subf
+        self.assertEqual(regex.sub(r'(test1)|(test2)', r'matched: \1\2', 'test1'), 'matched: test1')
+        self.assertEqual(regex.subf(r'(test1)|(test2)', r'matched: {1}{2}', 'test1'), 'matched: test1')
+        self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expand(r'matched: \1\2'), 'matched: test1'),
+        self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expandf(r'matched: {1}{2}'), 'matched: test1')
+
+        # Git issue 442: Fuzzy regex matching doesn't seem to test insertions correctly
+        self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having"), None)
+        self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having", flags=regex.I), None)
+
     def test_fuzzy_ext(self):
         self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
           True)
diff --git a/setup.cfg b/setup.cfg
index 4927abe7a921f670cafa74f837bfed057ae40350..8bfd5a12f85b8fbb6c058cf67dd23da690835ea0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-[egg_info]
-tag_build = 
-tag_date = 0
-
+[egg_info]
+tag_build = 
+tag_date = 0
+
diff --git a/setup.py b/setup.py
index 9a287d86e74977d808619b6703a80262983dff3f..a78c50ce4d74c0645288ef796f9dbb72f57a1993 100644
--- a/setup.py
+++ b/setup.py
@@ -12,12 +12,13 @@ with open('README.rst') as file:
 
 setup(
     name='regex',
-    version='2021.10.8',
+    version='2021.11.10',
     description='Alternative regular expression module, to replace re.',
     long_description=long_description,
+    long_description_content_type='text/x-rst',
     author='Matthew Barnett',
     author_email='regex@mrabarnett.plus.com',
-    url='https://bitbucket.org/mrabarnett/mrab-regex',
+    url='https://github.com/mrabarnett/mrab-regex',
     license='Apache Software License',
 
     classifiers=[
diff --git a/tools/build_regex_unicode.py b/tools/build_regex_unicode.py
index 7f376e5310faad31953b7dce67c6fa6c75007199..b8da0b689769a0444c97d00902e33fab946a9fc9 100644
--- a/tools/build_regex_unicode.py
+++ b/tools/build_regex_unicode.py
@@ -1,1777 +1,1777 @@
-#! python3.9
-# -*- coding: utf-8 -*-
-#
-# This Python script parses the Unicode data files and generates the C files
-# for the regex module.
-#
-# Written by MRAB.
-#
-from contextlib import suppress
-from itertools import chain
-from os import listdir, mkdir
-from os.path import basename, dirname, exists, join, normpath
-from urllib.parse import urljoin
-from urllib.request import urlretrieve
-from time import time
-
-import codecs
-import sys
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach())
-
-class Timed:
-    def __init__(self, message=None):
-        self._message = message
-
-    def __enter__(self):
-        self._start = time()
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        finish = time()
-        elapsed = finish - self._start
-
-        if self._message is None:
-            print(f'Took {elapsed:0.2f} secs')
-        else:
-            print(f'{self._message} took {elapsed:0.2f} secs')
-
-        return False
-
-def unique(iterable, key=None):
-
-    if key is None:
-        def key(item):
-            return item
-
-    seen = set()
-
-    for item in iterable:
-        k = key(item)
-
-        if k not in seen:
-            seen.add(k)
-            yield item
-
-class IterRanges:
-    def __init__(self, ranges):
-        self._ranges = ranges
-        self._pos = 0
-        self._update()
-
-    def next(self):
-        if self._pos >= len(self._ranges):
-            return
-
-        self._pos += 1
-        self._update()
-
-    def _update(self):
-        if self._pos < len(self._ranges):
-            self.lower, self.upper = self._ranges[self._pos]
-        else:
-            self.lower = self.upper = NUM_CODEPOINTS
-
-class Ranges:
-    def __init__(self, initial=None):
-        self._ranges = []
-
-        if initial is not None:
-            self._ranges.extend(initial)
-
-        self._is_normalised = initial is None
-
-    def add(self, lower, upper=None):
-        if upper is None:
-            self._ranges.append((lower, lower))
-        else:
-            self._ranges.append((lower, upper))
-
-        self._is_normalised = False
-
-    def __or__(self, other):
-        return Ranges(self._ranges + other._ranges)
-
-    def __sub__(self, other):
-        self._normalise()
-        other._normalise()
-
-        include = IterRanges(self._ranges)
-        exclude = IterRanges(other._ranges)
-        new_ranges = []
-
-        lower = include.lower
-
-        while lower < NUM_CODEPOINTS:
-            if lower < include.lower:
-                # We're below the current include range.
-                # Advance into the range.
-                lower = include.lower
-            elif lower > include.upper:
-                # We're above the current include range.
-                # Advance into the next include range.
-                include.next()
-                lower = max(lower, include.lower)
-            elif lower < exclude.lower:
-                # We're below the current exclude range.
-                # Accept codepoints as far as the end of the include range.
-                upper = min(include.upper, exclude.lower - 1)
-                new_ranges.append((lower, upper))
-                lower = upper + 1
-            elif lower > exclude.upper:
-                # We're above the current exclude range.
-                exclude.next()
-            else:
-                # We're within both the include and exclude ranges.
-                # Advance out of the overlap.
-                upper = min(include.upper, exclude.upper)
-                lower = upper + 1
-
-        return Ranges(new_ranges)
-
-    def __iter__(self):
-        self._normalise()
-
-        return iter(self._ranges)
-
-    def __len__(self):
-        self._normalise()
-
-        return len(self._ranges)
-
-    def lowest(self):
-        self._normalise()
-
-        return self._ranges[0][0]
-
-    def __repr__(self):
-        self._normalise()
-
-        return 'Ranges({!r})'.format(self._ranges)
-
-    def _normalise(self):
-        if self._is_normalised:
-            return
-
-        if len(self._ranges) >= 2:
-            self._ranges.sort()
-
-            new_ranges = []
-            lower, upper = self._ranges[0]
-
-            for l, u in self._ranges[1 : ]:
-                if l - upper > 1:
-                    new_ranges.append((lower, upper))
-                    lower, upper = l, u
-                else:
-                    lower = min(lower, l)
-                    upper = max(upper, u)
-
-            new_ranges.append((lower, upper))
-
-            self._ranges = new_ranges
-
-        self._is_normalised = True
-
-munge_dict = str.maketrans({'-': '', '_': '', ' ': ''})
-
-def munge(value):
-    munged_value = value.translate(munge_dict).upper()
-
-    if value.startswith('-'):
-        munged_value = '-' + munged_value
-
-    return munged_value
-
-def download_unicode_files(unicode_data_base, data_files, data_folder):
-    for section in data_files.values():
-        for rel_path in section:
-            path = normpath(join(data_folder, basename(rel_path)))
-
-            if not exists(path):
-                url = urljoin(unicode_data_base, rel_path)
-                print('Downloading {} from {}'.format(rel_path, url),
-                  flush=True)
-                urlretrieve(url, path)
-
-def parse_property_aliases(data_folder):
-    properties = {}
-
-    path = join(data_folder, 'PropertyAliases.txt')
-
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line or line.startswith('#'):
-                continue
-
-            fields = [field.strip() for field in line.split(';')]
-            prop_name = fields.pop(1)
-
-            property = {'names': list(unique([prop_name] + fields, key=munge))}
-
-            for name in property['names']:
-                properties[munge(name)] = property
-
-    return properties
-
-def parse_value_aliases(data_folder, properties):
-    path = join(data_folder, 'PropertyValueAliases.txt')
-
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line or line.startswith('#'):
-                continue
-
-            line = line.partition('#')[0]
-            fields = [field.strip() for field in line.split(';')]
-            prop_name = fields.pop(0)
-            val_name = fields.pop(2 if prop_name == 'ccc' else 1)
-
-            property = properties[munge(prop_name)]
-            value = {'names': list(unique([val_name] + fields, key=munge))}
-            values = property.setdefault('values', {})
-
-            for name in value['names']:
-                values[munge(name)] = value
-
-def parse_binary(properties, path):
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line or line.startswith('#'):
-                continue
-
-            line = line.partition('#')[0]
-            fields = [field.strip() for field in line.split(';')]
-            codepoints = [int(part, 16) for part in fields[0].split('..')]
-            prop_name = fields[1]
-            property = properties[munge(prop_name)]
-            property.setdefault('default', munge('No'))
-            value = property['values'][munge('Yes')]
-            value.setdefault('codepoints', Ranges()).add(codepoints[0],
-              codepoints[-1])
-
-def parse_emoji(properties, path):
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line:
-                continue
-
-            if line.startswith('# @missing:'):
-                fields = line.split()
-                prop_name = fields[-3]
-
-                try:
-                    property = properties[munge(prop_name)]
-                except KeyError:
-                    property = {'names': [prop_name], 'values': {}}
-                    value = {'names': ['No', 'N']}
-                    property['values'][munge(value['names'][0])] = value
-                    value = {'names': ['Yes', 'Y']}
-                    property['values'][munge(value['names'][0])] = value
-                    properties[munge(prop_name)] = property
-
-                default = fields[-1]
-                property['default'] = munge(default)
-            elif not line.startswith('#'):
-                line = line.partition('#')[0]
-                fields = [field.strip() for field in line.split(';')]
-                codepoints = [int(part, 16) for part in fields[0].split('..')]
-                prop_name = fields[1]
-                property = properties[munge(prop_name)]
-                property.setdefault('default', munge('No'))
-
-                try:
-                    value = property['values'][munge('Yes')]
-                except KeyError:
-                    value = {'names': ['Yes']}
-                    property['values'][munge('Yes')] = value
-
-                value.setdefault('codepoints', Ranges()).add(codepoints[0],
-                  codepoints[-1])
-
-def parse_multivalue(properties, path):
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line:
-                continue
-
-            if line.startswith('# Property:'):
-                prop_name = line.split()[-1]
-                property = properties[munge(prop_name)]
-            elif line.startswith('#  All code points not explicitly listed for'):
-                prop_name = line.split()[-1]
-                property = properties[munge(prop_name)]
-            elif line.startswith('# @missing:'):
-                default = line.split()[-1]
-                property['default'] = munge(default)
-            elif not line.startswith('#'):
-                line = line.partition('#')[0]
-                fields = [field.strip() for field in line.split(';')]
-                codepoints = [int(part, 16) for part in fields[0].split('..')]
-                val_name = fields[1]
-                value = property['values'][munge(val_name)]
-                value.setdefault('codepoints', Ranges()).add(codepoints[0],
-                  codepoints[-1])
-
-def parse_normalisation(properties, path):
-    property = None
-
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line:
-                continue
-
-            if line.startswith('# Derived Property:'):
-                property = None
-            elif line.startswith('# Property:'):
-                prop_name = line.split()[-1]
-                property = properties[munge(prop_name)]
-            elif property:
-                if line.startswith('# @missing:'):
-                    default = line.split()[-1]
-                    property['default'] = munge(default)
-                elif not line.startswith('#'):
-                    line = line.partition('#')[0]
-                    fields = [field.strip() for field in line.split(';')]
-                    codepoints = [int(part, 16) for part in
-                      fields[0].split('..')]
-                    val_name = fields[2]
-
-                    value = property['values'][munge(val_name)]
-                    value.setdefault('codepoints', Ranges()).add(codepoints[0],
-                      codepoints[-1])
-
-def parse_numeric_values(properties, path):
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line:
-                continue
-
-            if line.startswith('# Derived Property:'):
-                prop_name = line.split()[-1]
-                property = properties[munge(prop_name)]
-                default = {'names': ['NaN']}
-                property['values'] = {munge('NaN'): default}
-                property['default'] = munge('NaN')
-            elif line.startswith('# @missing:'):
-                default = line.split()[-1]
-                property['default'] = munge(default)
-            elif not line.startswith('#'):
-                line = line.partition('#')[0]
-                fields = [field.strip() for field in line.split(';')]
-                codepoints = [int(part, 16) for part in fields[0].split('..')]
-                val_name = fields[3]
-
-                try:
-                    value = property['values'][munge(val_name)]
-                except KeyError:
-                    value = {'names': [val_name]}
-                    property['values'][munge(val_name)] = value
-
-                value.setdefault('codepoints', Ranges()).add(codepoints[0],
-                  codepoints[-1])
-
-def parse_script_extensions(properties, path):
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line:
-                continue
-
-            if line.startswith('# Property:'):
-                prop_name = line.split()[-1]
-                property = properties[munge(prop_name)]
-                property['values'] = {}
-            elif not line.startswith('#'):
-                line = line.partition('#')[0]
-                fields = [field.strip() for field in line.split(';')]
-                codepoints = [int(part, 16) for part in fields[0].split('..')]
-
-                key = tuple(sorted(fields[1].split(), key=str.lower))
-
-                try:
-                    value = property['values'][key]
-                except KeyError:
-                    value = {'codepoints': Ranges()}
-                    property['values'][key] = value
-
-                value['codepoints'].add(codepoints[0], codepoints[-1])
-
-def parse_case_folding(properties, path):
-    simple_folding = {}
-    full_folding = {}
-    turkic_set = set()
-
-    with open(path, encoding='utf-8') as file:
-        for line in file:
-            line = line.strip()
-
-            if not line or line.startswith('#'):
-                continue
-
-            line = line.partition('#')[0]
-            fields = line.split(';')
-            codepoint = int(fields[0], 16)
-            kind = fields[1].strip()
-            folded = [int(part, 16) for part in fields[2].split()]
-            delta = folded[0] ^ codepoint
-
-            if kind in {'S', 'C', 'T'}:
-                simple_folding.setdefault(delta, Ranges()).add(codepoint,
-                  codepoint)
-
-            if kind in {'F', 'C', 'T'}:
-                key = tuple([delta] + folded[1 : ])
-                full_folding.setdefault(key, Ranges()).add(codepoint,
-                  codepoint)
-
-            if kind == 'T':
-                turkic_set.add((codepoint, tuple(folded)))
-
-    # Is the Turkic set what we expected?
-    if turkic_set != {(0x49, (0x131, )), (0x130, (0x69, ))}:
-        raise ValueError('Turkic set has changed')
-
-    properties['simple_folding'] = simple_folding
-    properties['full_folding'] = full_folding
-
-def parse_unicode_files(data_files, data_folder):
-    properties = parse_property_aliases(data_folder)
-    parse_value_aliases(data_folder, properties)
-
-    def ignore(*args):
-        pass
-
-    parsers = {
-        'aliases': ignore,
-        'binary': parse_binary,
-        'emoji': parse_emoji,
-        'multivalue': parse_multivalue,
-        'normalisation': parse_normalisation,
-        'numeric_values': parse_numeric_values,
-        'script_extensions': parse_script_extensions,
-        'case_folding': parse_case_folding,
-    }
-
-    for section, rel_paths in data_files.items():
-        parse = parsers[section]
-
-        for rel_path in rel_paths:
-            data_file = basename(rel_path)
-
-            print('Parsing {}'.format(data_file), flush=True)
-            parse(properties, join(data_folder, data_file))
-
-    unicode_data = {'properties': {}}
-
-    for prop_name, property in properties.items():
-        if has_codepoints(property):
-            unicode_data['properties'][prop_name] = property
-        elif prop_name in {'simple_folding', 'full_folding'}:
-            unicode_data[prop_name] = property
-
-    properties = unicode_data['properties']
-    property = properties[munge('General_Category')]
-    property['default'] = munge('Unassigned')
-
-    values = property['values']
-
-    for val_name, value in list(values.items()):
-        if len(val_name) == 1:
-            new_name = val_name.upper() + '&'
-            values[munge(new_name)] = value
-            value['names'].append(new_name)
-
-    return unicode_data
-
-def make_binary_property(properties, names, codepoints):
-    no_value = {'names': ['No', 'N', 'F', 'False']}
-    yes_value = {'names': ['Yes', 'Y', 'T', 'True'], 'codepoints': codepoints}
-    values = {}
-
-    for value in [no_value, yes_value]:
-        for name in value['names']:
-            values[munge(name)] = value
-
-    property = {'names': names, 'values': values, 'default': munge('No')}
-
-    for name in names:
-        properties[munge(name)] = property
-
-def make_additional_properties(unicode_data):
-
-    def get_values(prop_name):
-        return properties[munge(prop_name)]['values']
-
-    def get_codepoints(prop_name, val_name):
-        return get_values(prop_name)[munge(val_name)]['codepoints']
-
-    properties = unicode_data['properties']
-
-    # Make the 'Alphanumeric' property.
-    alphabetic = get_codepoints('Alphabetic', 'Yes')
-    decimal_number = get_codepoints('General_Category', 'Decimal_Number')
-
-    make_binary_property(properties, ['Alphanumeric', 'AlNum'], alphabetic |
-      decimal_number)
-
-    # Make the 'Any' property.
-    make_binary_property(properties, ['Any'], Ranges([(0, NUM_CODEPOINTS -
-      1)]))
-
-    # General_Category has a compound value called 'Assigned'.
-    assigned = Ranges()
-
-    for value in unique(get_values('General_Category').values(), key=id):
-        if value['names'][0] != 'Unassigned':
-            try:
-                assigned |= value['codepoints']
-            except KeyError:
-                pass
-
-    value = {'names': ['Assigned']}
-    properties[munge('General_Category')]['values'][munge('Assigned')] = value
-
-    # Make the 'Blank' property.
-    space_separator = get_codepoints('General_Category', 'Space_Separator')
-    blank = Ranges([(0x09, 0x09)]) | space_separator
-
-    make_binary_property(properties, ['Blank'], blank)
-
-    # Make the 'Graph' property.
-    whitespace = get_codepoints('White_Space', 'Yes')
-    control = get_codepoints('General_Category', 'Control')
-    surrogate = get_codepoints('General_Category', 'Surrogate')
-
-    graph = assigned - (whitespace | control | surrogate)
-
-    make_binary_property(properties, ['Graph'], graph)
-
-    # Make the 'Print' property.
-    print_ = (graph | blank) - control
-
-    make_binary_property(properties, ['Print'], print_)
-
-    # Make the 'Word' property.
-    enclosing_mark = get_codepoints('General_Category', 'Enclosing_Mark')
-    nonspacing_mark = get_codepoints('General_Category', 'Nonspacing_Mark')
-    spacing_mark = get_codepoints('General_Category', 'Spacing_Mark')
-    connector_punctuation = get_codepoints('General_Category',
-      'Connector_Punctuation')
-    join_control = get_codepoints('Join_Control', 'Yes')
-
-    word = (alphabetic | enclosing_mark | nonspacing_mark | spacing_mark |
-      decimal_number | connector_punctuation | join_control)
-
-    make_binary_property(properties, ['Word'], word)
-
-    # Make the 'XDigit' property.
-    hex_digit = get_codepoints('Hex_Digit', 'Yes')
-
-    xdigit = decimal_number | hex_digit
-
-    make_binary_property(properties, ['XDigit'], xdigit)
-
-    # Make the 'Posix_Digit' property.
-    posix_digit = Ranges([(ord('0'), ord('9'))])
-
-    make_binary_property(properties, ['Posix_Digit'], posix_digit)
-
-    # Make the 'Posix_AlNum' property.
-    posix_alnum = alphabetic | posix_digit
-
-    make_binary_property(properties, ['Posix_AlNum'], posix_alnum)
-
-    # Make the 'Posix_Punct' property.
-    punctuation = Ranges()
-
-    for name in 'Pd Ps Pe Pc Po Pi Pf'.split():
-        punctuation |= get_codepoints('General_Category', name)
-
-    symbol = Ranges()
-
-    for name in 'Sm Sc Sk So '.split():
-        symbol |= get_codepoints('General_Category', name)
-
-    posix_punct = (punctuation | symbol) - alphabetic
-
-    make_binary_property(properties, ['Posix_Punct'], posix_punct)
-
-    # Make the 'Posix_XDigit' property.
-    posix_xdigit = Ranges([(ord('0'), ord('9')), (ord('A'), ord('F')),
-      (ord('a'), ord('f'))])
-
-    make_binary_property(properties, ['Posix_XDigit'], posix_xdigit)
-
-def preferred(d):
-    return munge(d['names'][0])
-
-def has_codepoints(property):
-    if 'values' not in property:
-        return False
-
-    return any('codepoints' in value for value in property['values'].values())
-
-def write_summary(unicode_data, tools_folder):
-    print('Writing summary')
-
-    properties = unicode_data['properties']
-
-    path = join(tools_folder, 'Unicode.txt')
-
-    with open(path, 'w', encoding='ascii') as file:
-        file.write('Version {}\n'.format(UNICODE_VERSION))
-
-        for property in sorted(unique(properties.values(), key=id),
-          key=preferred):
-            if not has_codepoints(property):
-                print(property['names'][0])
-                continue
-
-            file.write('Property {}\n'.format(' '.join(property['names'])))
-
-            values = property['values']
-
-            if property['names'][0] == 'Script_Extensions':
-                for key in sorted(values):
-                    value = values[key]
-                    file.write('Value {}\n'.format(' '.join(key)))
-
-                    for lower, upper in value.get('codepoints', []):
-                        if lower == upper:
-                            file.write('{:04X}\n'.format(lower))
-                        else:
-                            file.write('{:04X}..{:04X}\n'.format(lower, upper))
-            else:
-                if 'default' in property:
-                    default = values[property['default']]
-                    file.write('DefaultValue {}\n'.format(default['names'][0]))
-
-                for value in sorted(unique(values.values(), key=id),
-                  key=preferred):
-                    file.write('Value {}\n'.format(' '.join(value['names'])))
-
-                    for lower, upper in value.get('codepoints', []):
-                        if lower == upper:
-                            file.write('{:04X}\n'.format(lower))
-                        else:
-                            file.write('{:04X}..{:04X}\n'.format(lower, upper))
-
-        file.write('SimpleFolding\n')
-
-        for delta, ranges in unicode_data['simple_folding'].items():
-            file.write('Value {:04X}\n'.format(delta))
-
-            for lower, upper in ranges:
-                if lower == upper:
-                    file.write('{:04X}\n'.format(lower))
-                else:
-                    file.write('{:04X}..{:04X}\n'.format(lower, upper))
-
-        file.write('FullFolding\n')
-
-        for key, ranges in unicode_data['full_folding'].items():
-            file.write('Value {}\n'.format(' '.join('{:04X}'.format(value) for
-              value in key)))
-
-            for lower, upper in ranges:
-                if lower == upper:
-                    file.write('{:04X}\n'.format(lower))
-                else:
-                    file.write('{:04X}..{:04X}\n'.format(lower, upper))
-
-def make_binary_dict():
-    binary_dict = {}
-
-    for n in range(0x100):
-        key = tuple(map(int, format(n, '08b')[ : : -1]))
-        binary_dict[key] = n
-
-    return binary_dict
-
-def collect_strings(properties):
-    strings = []
-
-    for property in properties.values():
-        try:
-            strings.extend(property['names'])
-
-            for value in property['values'].values():
-                strings.extend(value['names'])
-        except KeyError:
-            pass
-
-    return sorted(set(munge(string) for string in strings))
-
-def chunked(iterable, chunk_size):
-    sequence = iterable
-    count = len(sequence)
-
-    for start in range(0, count, chunk_size):
-        chunk = sequence[start : start +  chunk_size]
-        yield chunk
-
-def determine_entry_type(iterable):
-    lower, upper = min(iterable), max(iterable)
-
-    if 0 <= lower <= upper <= 0xFF:
-        return 'RE_UINT8'
-
-    if 0 <= lower <= upper <= 0xFFFF:
-        return 'RE_UINT16'
-
-    raise ValueError('cannot determine C type for {}..{}'.format(lower, upper))
-
-def is_binary(property):
-    return sum(1 for val in val_list if val['id'] != 0) == 1
-
-def count_ranges(property):
-    count = 0
-    default_id = property['values'][munge(property['default'])]['id']
-
-    for value in unique(property['values'].values(), key=id):
-        if value['id'] != default_id:
-            count += len(value.get('codepoints', []))
-
-    return count
-
-def generate_small_lookup(property, c_file):
-    c_file.write('''
-/* {}. */
-RE_UINT32 re_get_{}(RE_UINT32 codepoint) {{
-'''.format(property['names'][0], property['names'][0].lower()))
-
-    default_id = property['values'][munge(property['default'])]['id']
-    ranges = []
-
-    for value in unique(property['values'].values(), key=id):
-        if value['id'] != default_id:
-            val_id = value['id']
-
-            for lower, upper in value.get('codepoints', []):
-                ranges.append((lower, upper, val_id))
-
-    if len(ranges) == 1 and ranges[0][ : 2] == (0, NUM_CODEPOINTS - 1):
-        c_file.write('    return {};\n}}\n'.format(ranges[0][2]))
-    else:
-        for lower, upper, val_id in ranges:
-            width = 2 if upper <= 0xFF else 4 if upper <= 0xFFFF else 6
-
-            if lower == upper:
-                c_file.write('''\
-    if (codepoint == 0x{:0{width}X})
-        return {};
-'''.format(lower, val_id, width=width))
-            else:
-                c_file.write('''\
-    if (0x{:0{width}X} <= codepoint && codepoint <= 0x{:0{width}X})
-        return {};
-'''.format(lower, upper, val_id, width=width))
-
-        c_file.write('\n    return {};\n}}\n'.format(default_id))
-
-def generate_table(table_name, values, c_file, max_columns=16, public=False):
-    entry_type = determine_entry_type(values)
-
-    if public:
-        c_file.write('{} {}[] = {{\n'.format(entry_type, table_name))
-    else:
-        c_file.write('static {} {}[] = {{\n'.format(entry_type, table_name))
-
-    entries = [str(value) for value in values]
-    max_width = max(len(entry) for entry in entries)
-    entries = [entry.rjust(max_width) + ',' for entry in entries]
-    entries[-1] = entries[-1].rstrip(',')
-
-    for chunk in chunked(entries, max_columns):
-        c_file.write('    %s\n' % ' '.join(chunk))
-
-    c_file.write('};\n')
-
-def generate_lookup(property, c_file):
-    val_list = list(unique(property['values'].values(), key=id))
-
-    if count_ranges(property) <= 8:
-        generate_small_lookup(property, c_file)
-        return
-
-    default_id = property['values'][munge(property['default'])]['id']
-    entries = [default_id] * NUM_CODEPOINTS
-
-    for value in val_list:
-        val_id = value['id']
-
-        for lower, upper in value.get('codepoints', []):
-            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
-
-    CHUNK_SIZE = 32
-
-    indexes = []
-    chunks = {}
-
-    for chunk in chunked(tuple(entries), CHUNK_SIZE):
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    entries = indexes
-    indexes = []
-    chunks = {}
-
-    for start in range(0, len(entries), CHUNK_SIZE):
-        chunk = tuple(entries[start : start + CHUNK_SIZE])
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    table_0 = indexes
-
-    c_file.write('\n/* {}. */\n'.format(property['names'][0]))
-
-    prop_name = property['names'][0].lower()
-    binary = set(table_2) == {0, 1}
-
-    for i, table in enumerate([table_0, table_1, table_2]):
-        if i == 2 and binary:
-            binary = True
-            entries = []
-
-            for start in range(0, len(table), 8):
-                entries.append(binary_dict[tuple(table[start : start + 8])])
-
-            table = entries
-
-        if i > 0:
-            c_file.write('\n')
-
-        generate_table('re_{}_table_{}'.format(prop_name, 1 + i), table,
-          c_file)
-
-    if binary:
-        c_file.write('''
-RE_UINT32 re_get_{0}(RE_UINT32 codepoint) {{
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 offset;
-    RE_UINT32 v;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = (codepoint >> 3) & 0x3;
-    offset = codepoint & 0x7;
-
-    v = re_{0}_table_1[field_2];
-    v = re_{0}_table_2[(v << 5) | field_1];
-    v = re_{0}_table_3[(v << 2) | field_0];
-
-    return (v >> offset) & 0x1;
-}}
-'''.format(prop_name))
-    else:
-        c_file.write('''
-RE_UINT32 re_get_{0}(RE_UINT32 codepoint) {{
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 v;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = codepoint & 0x1F;
-
-    v = re_{0}_table_1[field_2];
-    v = re_{0}_table_2[(v << 5) | field_1];
-    v = re_{0}_table_3[(v << 5) | field_0];
-
-    return v;
-}}
-'''.format(prop_name))
-
-def generate_script_extensions_lookup(properties, property, c_file):
-    entries = [0] * NUM_CODEPOINTS
-
-    # Initialise with script.
-    val_list = unique(properties[munge('Script')]['values'].values(), key=id)
-
-    for value in val_list:
-        val_id = value['id']
-
-        for lower, upper in value.get('codepoints', []):
-            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
-
-    script_count = 1 + max(value['id'] for value in
-      properties[munge('Script')]['values'].values())
-
-    val_list = unique(property['values'].values(), key=id)
-
-    for value in val_list:
-        val_id = value['id']
-
-        for lower, upper in value.get('codepoints', []):
-            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
-
-    CHUNK_SIZE = 32
-
-    indexes = []
-    chunks = {}
-
-    for chunk in chunked(entries, CHUNK_SIZE):
-        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
-
-    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    entries = indexes
-    indexes = []
-    chunks = {}
-
-    for start in range(0, len(entries), CHUNK_SIZE):
-        chunk = tuple(entries[start : start + CHUNK_SIZE])
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    table_0 = indexes
-
-    c_file.write('\n/* {}. */\n'.format(property['names'][0]))
-
-    prop_name = property['names'][0].lower()
-
-    for i, table in enumerate([table_0, table_1, table_2]):
-        generate_table('{}_table_{}'.format(prop_name, 1 + i), table, c_file)
-
-    script_values = properties[munge('Script')]['values']
-    ext_dict = {}
-
-    for key, value in property['values'].items():
-        ext_dict[value['id']] = [script_values[munge(name)]['id'] for name in
-          key]
-
-    offsets = []
-    entries = []
-
-    for key, value in sorted(ext_dict.items()):
-        offsets.append(len(entries))
-        entries.extend(value + [0])
-
-    generate_table('{}_table_4'.format(prop_name), offsets, c_file)
-
-    generate_table('{}_table_5'.format(prop_name), entries, c_file)
-
-    c_file.write('''
-int re_get_{0}(RE_UINT32 codepoint, RE_UINT8* scripts) {{
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 v;
-    int offset;
-    int count;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = codepoint & 0x1F;
-
-    v = {0}_table_1[field_2];
-    v = {0}_table_2[(v << 5) | field_1];
-    v = {0}_table_3[(v << 5) | field_0];
-
-    if (v < {1}) {{
-        scripts[0] = v;
-
-        return 1;
-    }}
-
-    offset = {0}_table_4[v - {1}];
-    count = 0;
-
-    do {{
-        scripts[count] = {0}_table_5[offset + count];
-        ++count;
-    }} while ({0}_table_5[offset + count] != 0);
-
-    return count;
-}}
-'''.format(prop_name, script_count))
-
-def generate_all_cases(unicode_data, c_file):
-    simple_folding = unicode_data['simple_folding']
-
-    all_cases = {}
-
-    for delta, ranges in simple_folding.items():
-        for lower, upper in ranges:
-            for codepoint in range(lower, upper + 1):
-                folded = codepoint ^ delta
-                all_cases.setdefault(folded, set()).update({codepoint, folded})
-
-    for codepoint in list(all_cases):
-        cases = {codepoint} | all_cases.get(codepoint, set())
-
-        for c in list(cases):
-            cases |= all_cases.get(c, set())
-
-        for c in cases:
-            all_cases[c] = cases
-
-    all_cases[0x49] = {0x49, 0x69, 0x131} # Dotless capital I.
-    all_cases[0x69] = {0x69, 0x49, 0x130} # Dotted small I.
-    all_cases[0x130] = {0x130, 0x69} # Dotted capital I.
-    all_cases[0x131] = {0x131, 0x49} # Dotless small I.
-
-    entries = [0] * NUM_CODEPOINTS
-    others_dict = {(0, ): 0}
-
-    for codepoint, cases in all_cases.items():
-        others = sorted(cases - {codepoint})
-        key = tuple([others[0] ^ codepoint] + others[1 : ])
-        entries[codepoint] = others_dict.setdefault(key, len(others_dict))
-
-    CHUNK_SIZE = 32
-
-    indexes = []
-    chunks = {}
-
-    for chunk in chunked(entries, CHUNK_SIZE):
-        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
-
-    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    entries = indexes
-    indexes = []
-    chunks = {}
-
-    for start in range(0, len(entries), CHUNK_SIZE):
-        chunk = tuple(entries[start : start + CHUNK_SIZE])
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    table_0 = indexes
-
-    c_file.write('\n/* All cases. */\n')
-
-    for i, table in enumerate([table_0, table_1, table_2]):
-        if i > 0:
-            c_file.write('\n')
-
-        generate_table('re_all_cases_table_{}'.format(1 + i), table, c_file)
-
-    c_file.write('\nstatic RE_AllCases re_all_cases_table_4[] = {\n')
-
-    max_columns = max(len(value) for value in others_dict)
-
-    max_width = max(len(str(item)) for value in others_dict for item in value)
-    fmt = '    {{{:%d}, {{' % max_width + ', '.join(['{:%d}' % max_width] *
-      (max_columns -1)) + '}}}},\n'
-
-    lines = []
-
-    for values in sorted(others_dict, key=others_dict.get):
-        values = list(values) + [0] * max_columns
-        lines.append(fmt.format(*values))
-
-    lines[-1] = lines[-1].rstrip(',\n') + '\n'
-
-    c_file.writelines(lines)
-
-    c_file.write('};\n')
-
-    c_file.write('''
-int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases) {
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 v;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = codepoint & 0x1F;
-
-    v = re_all_cases_table_1[field_2];
-    v = re_all_cases_table_2[(v << 5) | field_1];
-    v = re_all_cases_table_3[(v << 5) | field_0];
-
-    cases[0] = codepoint;
-
-    if (re_all_cases_table_4[v].delta == 0)
-        return 1;
-
-    cases[1] = codepoint ^ re_all_cases_table_4[v].delta;
-
-    if (re_all_cases_table_4[v].others[0] == 0)
-        return 2;
-
-    cases[2] = re_all_cases_table_4[v].others[0];
-
-    if (re_all_cases_table_4[v].others[1] == 0)
-        return 3;
-
-    cases[3] = re_all_cases_table_4[v].others[1];
-
-    return 4;
-}
-''')
-
-def generate_simple_case_folding(unicode_data, c_file):
-    simple_folding = unicode_data['simple_folding']
-
-    entries = [0] * NUM_CODEPOINTS
-    value_dict = {0: 0}
-
-    for delta, ranges in sorted(simple_folding.items()):
-        val_id = value_dict.setdefault(delta, len(value_dict))
-
-        for lower, upper in ranges:
-            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
-
-    CHUNK_SIZE = 32
-
-    indexes = []
-    chunks = {}
-
-    for chunk in chunked(entries, CHUNK_SIZE):
-        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
-
-    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    entries = indexes
-    indexes = []
-    chunks = {}
-
-    for start in range(0, len(entries), CHUNK_SIZE):
-        chunk = tuple(entries[start : start + CHUNK_SIZE])
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    table_0 = indexes
-
-    c_file.write('\n/* Simple case folding. */\n')
-
-    for i, table in enumerate([table_0, table_1, table_2]):
-        if i > 0:
-            c_file.write('\n')
-
-        generate_table('re_simple_folding_table_{}'.format(1 + i), table, c_file)
-
-    c_file.write('\nstatic RE_UINT16 re_simple_folding_table_4[] = {\n')
-
-    entries = [str(value) for value in sorted(value_dict, key=value_dict.get)]
-    max_width = max(len(entry) for entry in entries)
-    entries = [entry.rjust(max_width) + ',' for  entry in entries]
-    entries[-1] = entries[-1].rstrip(',')
-
-    for chunk in chunked(entries, 8):
-        c_file.write('    %s\n' % ' '.join(chunk))
-
-    c_file.write('};\n')
-
-    c_file.write('''
-RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint) {
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 v;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = codepoint & 0x1F;
-
-    v = re_simple_folding_table_1[field_2];
-    v = re_simple_folding_table_2[(v << 5) | field_1];
-    v = re_simple_folding_table_3[(v << 5) | field_0];
-
-    return codepoint ^ re_simple_folding_table_4[v];
-}
-''')
-
-def generate_full_case_folding(unicode_data, c_file):
-    full_folding = unicode_data['full_folding']
-
-    entries = [0] * NUM_CODEPOINTS
-    value_dict = {(0, ): 0}
-
-    for delta, ranges in sorted(full_folding.items()):
-        val_id = value_dict.setdefault(delta, len(value_dict))
-
-        for lower, upper in ranges:
-            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
-
-    CHUNK_SIZE = 32
-
-    indexes = []
-    chunks = {}
-
-    for chunk in chunked(entries, CHUNK_SIZE):
-        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
-
-    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    entries = indexes
-    indexes = []
-    chunks = {}
-
-    for start in range(0, len(entries), CHUNK_SIZE):
-        chunk = tuple(entries[start : start + CHUNK_SIZE])
-        indexes.append(chunks.setdefault(chunk, len(chunks)))
-
-    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
-
-    table_0 = indexes
-
-    c_file.write('\n/* Full case folding. */\n')
-
-    for i, table in enumerate([table_0, table_1, table_2]):
-        if i > 0:
-            c_file.write('\n')
-
-        generate_table('re_full_folding_table_{}'.format(1 + i), table, c_file)
-
-    c_file.write('\nstatic RE_FullCaseFolding re_full_folding_table_4[] = {\n')
-
-    max_folded = max(len(value) for value in value_dict)
-    max_width = max(len(str(item)) for value in value_dict for item in value)
-    rows = [(value + (0, ) * max_folded)[ : max_folded] for value in
-      sorted(value_dict, key=value_dict.get)]
-    fmt = ('    {{{{' + ', '.join(['{:%d}' % max_width] * max_folded) +
-      '}}}},\n').format
-    lines = []
-
-    for row in rows:
-        lines.append(fmt(*row))
-
-    lines[-1] = lines[-1].rstrip(',\n') + '\n'
-
-    c_file.writelines(lines)
-
-    c_file.write('};\n')
-
-    c_file.write('''
-int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded) {
-    RE_UINT32 field_2;
-    RE_UINT32 field_1;
-    RE_UINT32 field_0;
-    RE_UINT32 v;
-    RE_UINT16* data;
-
-    field_2 = codepoint >> 10;
-    field_1 = (codepoint >> 5) & 0x1F;
-    field_0 = codepoint & 0x1F;
-
-    v = re_full_folding_table_1[field_2];
-    v = re_full_folding_table_2[(v << 5) | field_1];
-    v = re_full_folding_table_3[(v << 5) | field_0];
-
-    data = re_full_folding_table_4[v].data;
-    folded[0] = codepoint ^ data[0];
-
-    if (data[1] == 0)
-        return 1;
-
-    folded[1] = data[1];
-
-    if (data[2] == 0)
-        return 2;
-
-    folded[2] = data[2];
-
-    return 3;
-}
-''')
-
-def generate_code(unicode_data, tools_folder):
-    print('Generating code')
-
-    # Codepoints that expand on full casefolding.
-    expanded = []
-
-    for key, ranges in unicode_data['full_folding'].items():
-        if len(key) > 1:
-            for lower, upper in ranges:
-                expanded.extend(range(lower, upper + 1))
-
-    expanded.sort()
-
-    # Assign the property and value IDs.
-    properties = unicode_data['properties']
-    prop_list = list(unique(properties.values(), key=id))
-    prop_list.sort(key=preferred)
-
-    unicode_data['property_tablee_count'] = len(properties)
-    unicode_data['property_count'] = len(prop_list)
-
-    no_yes_maybe = {
-        'NO', 'N', 'FALSE', 'F',
-        'YES', 'Y', 'TRUE', 'T',
-        'MAYBE', 'M',
-    }
-
-    yes_no_maybe_dict = {'No': 0, 'Yes': 1, 'Maybe': 2}
-
-    for prop_id, property in enumerate(prop_list):
-        property['id'] = prop_id
-
-        if property['names'][0] == 'Script_Extensions':
-            script_count = 1 + max(val['id'] for val in
-              properties[munge('Script')]['values'].values())
-
-            def make_key(value):
-                return value['codepoints'].lowest()
-
-            val_list = list(unique(property['values'].values(), key=id))
-            val_list.sort(key=make_key)
-
-            for val_id, value in enumerate(val_list):
-                value['id'] = script_count + val_id
-        else:
-            default = property['default']
-
-            if not (set(property['values']) - no_yes_maybe):
-
-                def make_key(value):
-                    return yes_no_maybe_dict[value['names'][0]]
-
-            else:
-
-                def make_key(value):
-                    if munge(value['names'][0]) == default:
-                        return (0, )
-
-                    if 'codepoints' not in value:
-                        return (2, )
-
-                    return 1, value['codepoints'].lowest()
-
-            val_list = list(unique(property['values'].values(), key=id))
-            val_list.sort(key=make_key)
-
-            def make_key(val):
-                name_list = [name for name in val['names'] if '&' in name]
-
-                if name_list:
-                    return 1, name_list[0][0]
-
-                return 0
-
-            if property['names'][0] == 'General_Category':
-
-                def make_key(value):
-                    for name in value['names']:
-                        if '&' in name:
-                            return (1, name)
-
-                    if value.get('codepoints'):
-                        return (0, )
-
-                    return (2, munge(value['names'][0]))
-
-                for val_id, value in enumerate(sorted(val_list, key=make_key)):
-                    value['id'] = val_id
-            else:
-                for val_id, value in enumerate(val_list):
-                    value['id'] = val_id
-
-    # Collect the value sets.
-    valueset_dict = {}
-
-    for property in sorted(prop_list, key=lambda prop: prop['id']):
-        prop_name = property['names'][0]
-
-        if prop_name == 'Script_Extensions':
-            property['valueset_id'] = properties[munge('Script')]['valueset_id']
-        else:
-            valueset = []
-
-            val_list = list(unique(property['values'].values(), key=id))
-
-            for value in sorted(val_list, key=lambda val: val['id']):
-                valueset.append(tuple(value['names']))
-
-            valueset_id = valueset_dict.setdefault(tuple(valueset),
-              len(valueset_dict))
-            property['valueset_id'] = valueset_id
-
-    strings = collect_strings(properties)
-
-    c_path = join(tools_folder, 'unicode.c')
-    h_path = join(tools_folder, 'unicode.h')
-
-    with open(c_path, 'w', newline='\n', encoding='ascii') as c_file:
-        c_file.write('''\
-/* For Unicode version {} */
-
-#include "_regex_unicode.h"
-
-#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP))
-#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN))
-#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC))
-
-typedef struct {{
-    RE_UINT8 scripts[RE_MAX_SCX];
-}} RE_ScriptExt;
-
-typedef struct {{
-    RE_UINT32 delta;
-    RE_UINT16 others[RE_MAX_CASES - 1];
-}} RE_AllCases;
-
-typedef struct {{
-    RE_UINT16 data[RE_MAX_FOLDED];
-}} RE_FullCaseFolding;
-
-/* Strings. */
-char* re_strings[] = {{
-'''.format(UNICODE_VERSION))
-
-        lines = []
-
-        for string in strings:
-            lines.append('    "{}",\n'.format(string))
-
-        strings_dict = {string: i for i, string in enumerate(strings)}
-
-        unicode_data['string_count'] = len(strings_dict)
-
-        c_file.writelines(lines)
-        c_file.write('''\
-};
-
-/* Properties. */
-RE_Property re_properties[] = {
-''')
-
-        for prop_id, property in enumerate(sorted(prop_list, key=lambda prop:
-          prop['id'])):
-            for name in property['names']:
-                c_file.write('    {{{:4}, {:2}, {:2}}}, /* {} */\n'.format(strings_dict[munge(name)],
-                  prop_id, property['valueset_id'], munge(name)))
-
-        c_file.write('''\
-};
-
-/* Property values. */
-RE_PropertyValue re_property_values[] = {
-''')
-
-        def make_key(names):
-            if any(len(name) == 2 for name in names):
-                return 0
-
-            return 1
-
-        gc_valset_id = properties[munge('General_Category')]['valueset_id']
-        count = 0
-
-        for valset, valset_id in sorted(valueset_dict.items(), key=lambda pair:
-          pair[1]):
-            if valset_id == gc_valset_id:
-                valset = sorted(valset, key=make_key)
-
-            for val_id, names in enumerate(valset):
-                for name in names:
-                    c_file.write('''    {{{:4}, {:2}, {:3}}}, /* {} */\n'''.format(strings_dict[munge(name)],
-                      valset_id, val_id, munge(name)))
-
-                count += len(names)
-
-        unicode_data['valueset_table_count'] = count
-
-        c_file.write('};\n')
-
-        c_file.write('''\n/* Codepoints which expand on full case-folding. */\n''')
-
-        unicode_data['expanded_count'] = len(expanded)
-        generate_table('re_expand_on_folding', expanded, c_file, max_columns=8, public=True)
-
-        for property in prop_list:
-            print('    {}'.format(property['names'][0]), flush=True)
-
-            if property['names'][0] == 'Script_Extensions':
-                generate_script_extensions_lookup(properties, property, c_file)
-            else:
-                generate_lookup(property, c_file)
-
-        print('    All cases', flush=True)
-        generate_all_cases(unicode_data, c_file)
-
-        print('    Simple case folding', flush=True)
-        generate_simple_case_folding(unicode_data, c_file)
-
-        print('    Full case folding', flush=True)
-        generate_full_case_folding(unicode_data, c_file)
-
-        c_file.write('''
-/* Property function table. */
-RE_GetPropertyFunc re_get_property[] = {
-''')
-
-        lines = []
-
-        for property in prop_list:
-            prop_name = property['names'][0].lower()
-
-            if prop_name == 'script_extensions':
-                lines.append('    0,\n')
-            else:
-                lines.append('    re_get_{},\n'.format(prop_name))
-
-        lines[-1] = lines[-1].rstrip(',\n') + '\n'
-
-        c_file.writelines(lines)
-
-        c_file.write('};\n')
-
-    with open(h_path, 'w', newline='\n', encoding='ascii') as h_file:
-        property = unicode_data['properties'][munge('Script_Extensions')]
-        max_scx = max(len(key) for key in property['values'])
-
-        h_file.write('''\
-typedef unsigned char RE_UINT8;
-typedef signed char RE_INT8;
-typedef unsigned short RE_UINT16;
-typedef signed short RE_INT16;
-typedef unsigned int RE_UINT32;
-typedef signed int RE_INT32;
-
-typedef unsigned char BOOL;
-#if !defined(FALSE) || !defined(TRUE)
-#define FALSE 0
-#define TRUE 1
-#endif
-
-#define RE_ASCII_MAX 0x7F
-#define RE_LOCALE_MAX 0xFF
-
-#define RE_MAX_CASES 4
-#define RE_MAX_FOLDED 3
-#define RE_MAX_SCX {}
-
-typedef struct RE_Property {{
-    RE_UINT16 name;
-    RE_UINT8 id;
-    RE_UINT8 value_set;
-}} RE_Property;
-
-typedef struct RE_PropertyValue {{
-    RE_UINT16 name;
-    RE_UINT8 value_set;
-    RE_UINT16 id;
-}} RE_PropertyValue;
-
-typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
-'''.format(max_scx))
-
-        gc_id = properties[munge('General_Category')]['id']
-        cased_id = properties[munge('Cased')]['id']
-        upper_id = properties[munge('Uppercase')]['id']
-        lower_id = properties[munge('Lowercase')]['id']
-        scx_id = properties[munge('Script_Extensions')]['id']
-
-        h_file.write('''
-#define RE_PROP_GC 0x{:X}
-#define RE_PROP_CASED 0x{:X}
-#define RE_PROP_UPPERCASE 0x{:X}
-#define RE_PROP_LOWERCASE 0x{:X}
-#define RE_PROP_SCX 0x{:X}
-
-'''.format(gc_id, cased_id, upper_id, lower_id, scx_id))
-
-        gc_values = properties[munge('General_Category')]['values']
-        group_names = set('C L M N P S Z Assigned Cased_Letter'.split())
-
-        names = set(gc_values) & set(munge(name) for name in group_names)
-
-        for name in sorted(names, key=lambda name: gc_values[name]['id']):
-            h_file.write('#define RE_PROP_{} {}\n'.format(name,
-              gc_values[name]['id']))
-
-        h_file.write('\n')
-
-        val_list = []
-        masks = {}
-
-        for name in gc_values:
-            if len(name) != 2 or not name.isalpha():
-                continue
-
-            if not gc_values[name].get('codepoints'):
-                continue
-
-            val_id = gc_values[name]['id']
-            val_list.append((val_id, name))
-            masks.setdefault(name[0], 0)
-            masks[name[0]] |= 1 << val_id
-
-        for val_id, name in sorted(val_list):
-            h_file.write('#define RE_PROP_{} {}\n'.format(name, val_id))
-
-        h_file.write('\n')
-
-        for name, mask in sorted(masks.items()):
-            h_file.write('#define RE_PROP_{}_MASK 0x{:08X}\n'.format(name,
-              mask))
-
-        h_file.write('\n')
-
-        common = '''
-            Alnum Alpha Any Ascii Blank Cntrl Digit Graph Lower Print Space
-            Upper Word Xdigit Posix_Alnum Posix_Digit Posix_Punct Posix_Xdigit
-        '''
-
-        for name in common.split():
-            property = properties.get(munge(name))
-
-            if property is not None:
-                h_file.write('#define RE_PROP_{} 0x{:06X}\n'.format(name.upper(),
-                  (property['id'] << 16) | 1))
-            else:
-                for prop_name in ['GC', 'Script', 'Block']:
-                    property = properties[munge(prop_name)]
-                    value = property['values'].get(munge(name))
-
-                    if value is not None:
-                        h_file.write('#define RE_PROP_{} 0x{:06X}\n'.format(name.upper(),
-                          (property['id'] << 16) | value['id']))
-                        break
-
-        h_file.write('\n')
-
-        val_list = unique(properties[munge('Word_Break')]['values'].values(),
-          key=id)
-        values = [(value['id'], value['names'][0]) for value in val_list]
-
-        for val_id, name in sorted(values):
-            h_file.write('#define RE_WBREAK_{} {}\n'.format(munge(name),
-              val_id))
-
-        h_file.write('\n')
-
-        val_list = unique(properties[munge('Grapheme_Cluster_Break')]['values'].values(),
-          key=id)
-        values = [(value['id'], value['names'][0]) for value in val_list]
-
-        for val_id, name in sorted(values):
-            h_file.write('#define RE_GBREAK_{} {}\n'.format(munge(name),
-              val_id))
-
-        h_file.write('\n')
-
-        val_list = unique(properties[munge('Line_Break')]['values'].values(),
-          key=id)
-        values = [(value['id'], value['names'][0]) for value in val_list]
-
-        for val_id, name in sorted(values):
-            h_file.write('#define RE_LBREAK_{} {}\n'.format(munge(name),
-              val_id))
-
-        h_file.write('\n')
-
-        h_file.write('extern char* re_strings[{}];\n'.format(unicode_data['string_count']))
-        h_file.write('extern RE_Property re_properties[{}];\n'.format(unicode_data['property_tablee_count']))
-        h_file.write('extern RE_PropertyValue re_property_values[{}];\n'.format(unicode_data['valueset_table_count']))
-        h_file.write('extern RE_UINT16 re_expand_on_folding[{}];\n'.format(unicode_data['expanded_count']))
-        h_file.write('extern RE_GetPropertyFunc re_get_property[{}];\n'.format(unicode_data['property_count']))
-
-        h_file.write('\n')
-
-        for property in prop_list:
-            prop_name = property['names'][0]
-
-            if prop_name == 'Script_Extensions':
-                h_file.write('int re_get_{}(RE_UINT32 codepoint, RE_UINT8* scripts);\n'.format(prop_name.lower()))
-            else:
-                h_file.write('RE_UINT32 re_get_{}(RE_UINT32 codepoint);\n'.format(prop_name.lower()))
-
-        h_file.write('int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases);\n')
-        h_file.write('RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint);\n')
-        h_file.write('int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded);\n')
-
-# Whether to update the Unicode data files from the Unicode website.
-UNICODE_VERSION = '14.0.0'
-
-# The URL from which the Unicode data files can be obtained.
-unicode_data_base = 'http://www.unicode.org/Public/UNIDATA/'
-
-NUM_CODEPOINTS = 0x110000
-
-# The Unicode data files. The file names are relative to the website URL.
-unicode_data_files = '''
-[aliases]
-PropertyAliases.txt
-PropertyValueAliases.txt
-[binary]
-PropList.txt
-extracted/DerivedBinaryProperties.txt
-DerivedCoreProperties.txt
-[emoji]
-emoji/emoji-data.txt
-[normalisation]
-DerivedNormalizationProps.txt
-[multivalue]
-auxiliary/GraphemeBreakProperty.txt
-auxiliary/SentenceBreakProperty.txt
-auxiliary/WordBreakProperty.txt
-Blocks.txt
-extracted/DerivedBidiClass.txt
-extracted/DerivedCombiningClass.txt
-extracted/DerivedDecompositionType.txt
-extracted/DerivedEastAsianWidth.txt
-extracted/DerivedGeneralCategory.txt
-extracted/DerivedJoiningGroup.txt
-extracted/DerivedJoiningType.txt
-extracted/DerivedLineBreak.txt
-extracted/DerivedNumericType.txt
-HangulSyllableType.txt
-IndicPositionalCategory.txt
-IndicSyllabicCategory.txt
-Scripts.txt
-[numeric_values]
-extracted/DerivedNumericValues.txt
-[case_folding]
-CaseFolding.txt
-[script_extensions]
-ScriptExtensions.txt
-'''
-
-data_files = {}
-section = ''
-
-for line in unicode_data_files.splitlines():
-    if line[ : 1] + line[-1 : ] == '[]':
-        section = line[1 : -1]
-    elif line:
-        data_files.setdefault(section, []).append(line)
-
-# The generated C files will be written into this folder.
-tools_folder = dirname(__file__)
-
-# The local folder in which the Unicode data files are stored.
-data_folder = join(tools_folder, 'unicode_data')
-
-with suppress(FileExistsError):
-    mkdir(data_folder)
-
-download_unicode_files(unicode_data_base, data_files, data_folder)
-
-unicode_data = parse_unicode_files(data_files, data_folder)
-make_additional_properties(unicode_data)
-write_summary(unicode_data, tools_folder)
-
-binary_dict = make_binary_dict()
-
-generate_code(unicode_data, tools_folder)
-
-print('\nFinished!')
+#! python3.9
+# -*- coding: utf-8 -*-
+#
+# This Python script parses the Unicode data files and generates the C files
+# for the regex module.
+#
+# Written by MRAB.
+#
+from contextlib import suppress
+from itertools import chain
+from os import listdir, mkdir
+from os.path import basename, dirname, exists, join, normpath
+from urllib.parse import urljoin
+from urllib.request import urlretrieve
+from time import time
+
+import codecs
+import sys
+sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach())
+
+class Timed:
+    def __init__(self, message=None):
+        self._message = message
+
+    def __enter__(self):
+        self._start = time()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        finish = time()
+        elapsed = finish - self._start
+
+        if self._message is None:
+            print(f'Took {elapsed:0.2f} secs')
+        else:
+            print(f'{self._message} took {elapsed:0.2f} secs')
+
+        return False
+
+def unique(iterable, key=None):
+
+    if key is None:
+        def key(item):
+            return item
+
+    seen = set()
+
+    for item in iterable:
+        k = key(item)
+
+        if k not in seen:
+            seen.add(k)
+            yield item
+
+class IterRanges:
+    def __init__(self, ranges):
+        self._ranges = ranges
+        self._pos = 0
+        self._update()
+
+    def next(self):
+        if self._pos >= len(self._ranges):
+            return
+
+        self._pos += 1
+        self._update()
+
+    def _update(self):
+        if self._pos < len(self._ranges):
+            self.lower, self.upper = self._ranges[self._pos]
+        else:
+            self.lower = self.upper = NUM_CODEPOINTS
+
+class Ranges:
+    def __init__(self, initial=None):
+        self._ranges = []
+
+        if initial is not None:
+            self._ranges.extend(initial)
+
+        self._is_normalised = initial is None
+
+    def add(self, lower, upper=None):
+        if upper is None:
+            self._ranges.append((lower, lower))
+        else:
+            self._ranges.append((lower, upper))
+
+        self._is_normalised = False
+
+    def __or__(self, other):
+        return Ranges(self._ranges + other._ranges)
+
+    def __sub__(self, other):
+        self._normalise()
+        other._normalise()
+
+        include = IterRanges(self._ranges)
+        exclude = IterRanges(other._ranges)
+        new_ranges = []
+
+        lower = include.lower
+
+        while lower < NUM_CODEPOINTS:
+            if lower < include.lower:
+                # We're below the current include range.
+                # Advance into the range.
+                lower = include.lower
+            elif lower > include.upper:
+                # We're above the current include range.
+                # Advance into the next include range.
+                include.next()
+                lower = max(lower, include.lower)
+            elif lower < exclude.lower:
+                # We're below the current exclude range.
+                # Accept codepoints as far as the end of the include range.
+                upper = min(include.upper, exclude.lower - 1)
+                new_ranges.append((lower, upper))
+                lower = upper + 1
+            elif lower > exclude.upper:
+                # We're above the current exclude range.
+                exclude.next()
+            else:
+                # We're within both the include and exclude ranges.
+                # Advance out of the overlap.
+                upper = min(include.upper, exclude.upper)
+                lower = upper + 1
+
+        return Ranges(new_ranges)
+
+    def __iter__(self):
+        self._normalise()
+
+        return iter(self._ranges)
+
+    def __len__(self):
+        self._normalise()
+
+        return len(self._ranges)
+
+    def lowest(self):
+        self._normalise()
+
+        return self._ranges[0][0]
+
+    def __repr__(self):
+        self._normalise()
+
+        return 'Ranges({!r})'.format(self._ranges)
+
+    def _normalise(self):
+        if self._is_normalised:
+            return
+
+        if len(self._ranges) >= 2:
+            self._ranges.sort()
+
+            new_ranges = []
+            lower, upper = self._ranges[0]
+
+            for l, u in self._ranges[1 : ]:
+                if l - upper > 1:
+                    new_ranges.append((lower, upper))
+                    lower, upper = l, u
+                else:
+                    lower = min(lower, l)
+                    upper = max(upper, u)
+
+            new_ranges.append((lower, upper))
+
+            self._ranges = new_ranges
+
+        self._is_normalised = True
+
+munge_dict = str.maketrans({'-': '', '_': '', ' ': ''})
+
+def munge(value):
+    munged_value = value.translate(munge_dict).upper()
+
+    if value.startswith('-'):
+        munged_value = '-' + munged_value
+
+    return munged_value
+
+def download_unicode_files(unicode_data_base, data_files, data_folder):
+    for section in data_files.values():
+        for rel_path in section:
+            path = normpath(join(data_folder, basename(rel_path)))
+
+            if not exists(path):
+                url = urljoin(unicode_data_base, rel_path)
+                print('Downloading {} from {}'.format(rel_path, url),
+                  flush=True)
+                urlretrieve(url, path)
+
+def parse_property_aliases(data_folder):
+    properties = {}
+
+    path = join(data_folder, 'PropertyAliases.txt')
+
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line or line.startswith('#'):
+                continue
+
+            fields = [field.strip() for field in line.split(';')]
+            prop_name = fields.pop(1)
+
+            property = {'names': list(unique([prop_name] + fields, key=munge))}
+
+            for name in property['names']:
+                properties[munge(name)] = property
+
+    return properties
+
+def parse_value_aliases(data_folder, properties):
+    path = join(data_folder, 'PropertyValueAliases.txt')
+
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line or line.startswith('#'):
+                continue
+
+            line = line.partition('#')[0]
+            fields = [field.strip() for field in line.split(';')]
+            prop_name = fields.pop(0)
+            val_name = fields.pop(2 if prop_name == 'ccc' else 1)
+
+            property = properties[munge(prop_name)]
+            value = {'names': list(unique([val_name] + fields, key=munge))}
+            values = property.setdefault('values', {})
+
+            for name in value['names']:
+                values[munge(name)] = value
+
+def parse_binary(properties, path):
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line or line.startswith('#'):
+                continue
+
+            line = line.partition('#')[0]
+            fields = [field.strip() for field in line.split(';')]
+            codepoints = [int(part, 16) for part in fields[0].split('..')]
+            prop_name = fields[1]
+            property = properties[munge(prop_name)]
+            property.setdefault('default', munge('No'))
+            value = property['values'][munge('Yes')]
+            value.setdefault('codepoints', Ranges()).add(codepoints[0],
+              codepoints[-1])
+
+def parse_emoji(properties, path):
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            if line.startswith('# @missing:'):
+                fields = line.split()
+                prop_name = fields[-3]
+
+                try:
+                    property = properties[munge(prop_name)]
+                except KeyError:
+                    property = {'names': [prop_name], 'values': {}}
+                    value = {'names': ['No', 'N']}
+                    property['values'][munge(value['names'][0])] = value
+                    value = {'names': ['Yes', 'Y']}
+                    property['values'][munge(value['names'][0])] = value
+                    properties[munge(prop_name)] = property
+
+                default = fields[-1]
+                property['default'] = munge(default)
+            elif not line.startswith('#'):
+                line = line.partition('#')[0]
+                fields = [field.strip() for field in line.split(';')]
+                codepoints = [int(part, 16) for part in fields[0].split('..')]
+                prop_name = fields[1]
+                property = properties[munge(prop_name)]
+                property.setdefault('default', munge('No'))
+
+                try:
+                    value = property['values'][munge('Yes')]
+                except KeyError:
+                    value = {'names': ['Yes']}
+                    property['values'][munge('Yes')] = value
+
+                value.setdefault('codepoints', Ranges()).add(codepoints[0],
+                  codepoints[-1])
+
+def parse_multivalue(properties, path):
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            if line.startswith('# Property:'):
+                prop_name = line.split()[-1]
+                property = properties[munge(prop_name)]
+            elif line.startswith('#  All code points not explicitly listed for'):
+                prop_name = line.split()[-1]
+                property = properties[munge(prop_name)]
+            elif line.startswith('# @missing:'):
+                default = line.split()[-1]
+                property['default'] = munge(default)
+            elif not line.startswith('#'):
+                line = line.partition('#')[0]
+                fields = [field.strip() for field in line.split(';')]
+                codepoints = [int(part, 16) for part in fields[0].split('..')]
+                val_name = fields[1]
+                value = property['values'][munge(val_name)]
+                value.setdefault('codepoints', Ranges()).add(codepoints[0],
+                  codepoints[-1])
+
+def parse_normalisation(properties, path):
+    property = None
+
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            if line.startswith('# Derived Property:'):
+                property = None
+            elif line.startswith('# Property:'):
+                prop_name = line.split()[-1]
+                property = properties[munge(prop_name)]
+            elif property:
+                if line.startswith('# @missing:'):
+                    default = line.split()[-1]
+                    property['default'] = munge(default)
+                elif not line.startswith('#'):
+                    line = line.partition('#')[0]
+                    fields = [field.strip() for field in line.split(';')]
+                    codepoints = [int(part, 16) for part in
+                      fields[0].split('..')]
+                    val_name = fields[2]
+
+                    value = property['values'][munge(val_name)]
+                    value.setdefault('codepoints', Ranges()).add(codepoints[0],
+                      codepoints[-1])
+
+def parse_numeric_values(properties, path):
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            if line.startswith('# Derived Property:'):
+                prop_name = line.split()[-1]
+                property = properties[munge(prop_name)]
+                default = {'names': ['NaN']}
+                property['values'] = {munge('NaN'): default}
+                property['default'] = munge('NaN')
+            elif line.startswith('# @missing:'):
+                default = line.split()[-1]
+                property['default'] = munge(default)
+            elif not line.startswith('#'):
+                line = line.partition('#')[0]
+                fields = [field.strip() for field in line.split(';')]
+                codepoints = [int(part, 16) for part in fields[0].split('..')]
+                val_name = fields[3]
+
+                try:
+                    value = property['values'][munge(val_name)]
+                except KeyError:
+                    value = {'names': [val_name]}
+                    property['values'][munge(val_name)] = value
+
+                value.setdefault('codepoints', Ranges()).add(codepoints[0],
+                  codepoints[-1])
+
+def parse_script_extensions(properties, path):
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            if line.startswith('# Property:'):
+                prop_name = line.split()[-1]
+                property = properties[munge(prop_name)]
+                property['values'] = {}
+            elif not line.startswith('#'):
+                line = line.partition('#')[0]
+                fields = [field.strip() for field in line.split(';')]
+                codepoints = [int(part, 16) for part in fields[0].split('..')]
+
+                key = tuple(sorted(fields[1].split(), key=str.lower))
+
+                try:
+                    value = property['values'][key]
+                except KeyError:
+                    value = {'codepoints': Ranges()}
+                    property['values'][key] = value
+
+                value['codepoints'].add(codepoints[0], codepoints[-1])
+
+def parse_case_folding(properties, path):
+    simple_folding = {}
+    full_folding = {}
+    turkic_set = set()
+
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            line = line.strip()
+
+            if not line or line.startswith('#'):
+                continue
+
+            line = line.partition('#')[0]
+            fields = line.split(';')
+            codepoint = int(fields[0], 16)
+            kind = fields[1].strip()
+            folded = [int(part, 16) for part in fields[2].split()]
+            delta = folded[0] ^ codepoint
+
+            if kind in {'S', 'C', 'T'}:
+                simple_folding.setdefault(delta, Ranges()).add(codepoint,
+                  codepoint)
+
+            if kind in {'F', 'C', 'T'}:
+                key = tuple([delta] + folded[1 : ])
+                full_folding.setdefault(key, Ranges()).add(codepoint,
+                  codepoint)
+
+            if kind == 'T':
+                turkic_set.add((codepoint, tuple(folded)))
+
+    # Is the Turkic set what we expected?
+    if turkic_set != {(0x49, (0x131, )), (0x130, (0x69, ))}:
+        raise ValueError('Turkic set has changed')
+
+    properties['simple_folding'] = simple_folding
+    properties['full_folding'] = full_folding
+
+def parse_unicode_files(data_files, data_folder):
+    properties = parse_property_aliases(data_folder)
+    parse_value_aliases(data_folder, properties)
+
+    def ignore(*args):
+        pass
+
+    parsers = {
+        'aliases': ignore,
+        'binary': parse_binary,
+        'emoji': parse_emoji,
+        'multivalue': parse_multivalue,
+        'normalisation': parse_normalisation,
+        'numeric_values': parse_numeric_values,
+        'script_extensions': parse_script_extensions,
+        'case_folding': parse_case_folding,
+    }
+
+    for section, rel_paths in data_files.items():
+        parse = parsers[section]
+
+        for rel_path in rel_paths:
+            data_file = basename(rel_path)
+
+            print('Parsing {}'.format(data_file), flush=True)
+            parse(properties, join(data_folder, data_file))
+
+    unicode_data = {'properties': {}}
+
+    for prop_name, property in properties.items():
+        if has_codepoints(property):
+            unicode_data['properties'][prop_name] = property
+        elif prop_name in {'simple_folding', 'full_folding'}:
+            unicode_data[prop_name] = property
+
+    properties = unicode_data['properties']
+    property = properties[munge('General_Category')]
+    property['default'] = munge('Unassigned')
+
+    values = property['values']
+
+    for val_name, value in list(values.items()):
+        if len(val_name) == 1:
+            new_name = val_name.upper() + '&'
+            values[munge(new_name)] = value
+            value['names'].append(new_name)
+
+    return unicode_data
+
+def make_binary_property(properties, names, codepoints):
+    no_value = {'names': ['No', 'N', 'F', 'False']}
+    yes_value = {'names': ['Yes', 'Y', 'T', 'True'], 'codepoints': codepoints}
+    values = {}
+
+    for value in [no_value, yes_value]:
+        for name in value['names']:
+            values[munge(name)] = value
+
+    property = {'names': names, 'values': values, 'default': munge('No')}
+
+    for name in names:
+        properties[munge(name)] = property
+
+def make_additional_properties(unicode_data):
+
+    def get_values(prop_name):
+        return properties[munge(prop_name)]['values']
+
+    def get_codepoints(prop_name, val_name):
+        return get_values(prop_name)[munge(val_name)]['codepoints']
+
+    properties = unicode_data['properties']
+
+    # Make the 'Alphanumeric' property.
+    alphabetic = get_codepoints('Alphabetic', 'Yes')
+    decimal_number = get_codepoints('General_Category', 'Decimal_Number')
+
+    make_binary_property(properties, ['Alphanumeric', 'AlNum'], alphabetic |
+      decimal_number)
+
+    # Make the 'Any' property.
+    make_binary_property(properties, ['Any'], Ranges([(0, NUM_CODEPOINTS -
+      1)]))
+
+    # General_Category has a compound value called 'Assigned'.
+    assigned = Ranges()
+
+    for value in unique(get_values('General_Category').values(), key=id):
+        if value['names'][0] != 'Unassigned':
+            try:
+                assigned |= value['codepoints']
+            except KeyError:
+                pass
+
+    value = {'names': ['Assigned']}
+    properties[munge('General_Category')]['values'][munge('Assigned')] = value
+
+    # Make the 'Blank' property.
+    space_separator = get_codepoints('General_Category', 'Space_Separator')
+    blank = Ranges([(0x09, 0x09)]) | space_separator
+
+    make_binary_property(properties, ['Blank'], blank)
+
+    # Make the 'Graph' property.
+    whitespace = get_codepoints('White_Space', 'Yes')
+    control = get_codepoints('General_Category', 'Control')
+    surrogate = get_codepoints('General_Category', 'Surrogate')
+
+    graph = assigned - (whitespace | control | surrogate)
+
+    make_binary_property(properties, ['Graph'], graph)
+
+    # Make the 'Print' property.
+    print_ = (graph | blank) - control
+
+    make_binary_property(properties, ['Print'], print_)
+
+    # Make the 'Word' property.
+    enclosing_mark = get_codepoints('General_Category', 'Enclosing_Mark')
+    nonspacing_mark = get_codepoints('General_Category', 'Nonspacing_Mark')
+    spacing_mark = get_codepoints('General_Category', 'Spacing_Mark')
+    connector_punctuation = get_codepoints('General_Category',
+      'Connector_Punctuation')
+    join_control = get_codepoints('Join_Control', 'Yes')
+
+    word = (alphabetic | enclosing_mark | nonspacing_mark | spacing_mark |
+      decimal_number | connector_punctuation | join_control)
+
+    make_binary_property(properties, ['Word'], word)
+
+    # Make the 'XDigit' property.
+    hex_digit = get_codepoints('Hex_Digit', 'Yes')
+
+    xdigit = decimal_number | hex_digit
+
+    make_binary_property(properties, ['XDigit'], xdigit)
+
+    # Make the 'Posix_Digit' property.
+    posix_digit = Ranges([(ord('0'), ord('9'))])
+
+    make_binary_property(properties, ['Posix_Digit'], posix_digit)
+
+    # Make the 'Posix_AlNum' property.
+    posix_alnum = alphabetic | posix_digit
+
+    make_binary_property(properties, ['Posix_AlNum'], posix_alnum)
+
+    # Make the 'Posix_Punct' property.
+    punctuation = Ranges()
+
+    for name in 'Pd Ps Pe Pc Po Pi Pf'.split():
+        punctuation |= get_codepoints('General_Category', name)
+
+    symbol = Ranges()
+
+    for name in 'Sm Sc Sk So '.split():
+        symbol |= get_codepoints('General_Category', name)
+
+    posix_punct = (punctuation | symbol) - alphabetic
+
+    make_binary_property(properties, ['Posix_Punct'], posix_punct)
+
+    # Make the 'Posix_XDigit' property.
+    posix_xdigit = Ranges([(ord('0'), ord('9')), (ord('A'), ord('F')),
+      (ord('a'), ord('f'))])
+
+    make_binary_property(properties, ['Posix_XDigit'], posix_xdigit)
+
+def preferred(d):
+    return munge(d['names'][0])
+
+def has_codepoints(property):
+    if 'values' not in property:
+        return False
+
+    return any('codepoints' in value for value in property['values'].values())
+
+def write_summary(unicode_data, tools_folder):
+    print('Writing summary')
+
+    properties = unicode_data['properties']
+
+    path = join(tools_folder, 'Unicode.txt')
+
+    with open(path, 'w', encoding='ascii') as file:
+        file.write('Version {}\n'.format(UNICODE_VERSION))
+
+        for property in sorted(unique(properties.values(), key=id),
+          key=preferred):
+            if not has_codepoints(property):
+                print(property['names'][0])
+                continue
+
+            file.write('Property {}\n'.format(' '.join(property['names'])))
+
+            values = property['values']
+
+            if property['names'][0] == 'Script_Extensions':
+                for key in sorted(values):
+                    value = values[key]
+                    file.write('Value {}\n'.format(' '.join(key)))
+
+                    for lower, upper in value.get('codepoints', []):
+                        if lower == upper:
+                            file.write('{:04X}\n'.format(lower))
+                        else:
+                            file.write('{:04X}..{:04X}\n'.format(lower, upper))
+            else:
+                if 'default' in property:
+                    default = values[property['default']]
+                    file.write('DefaultValue {}\n'.format(default['names'][0]))
+
+                for value in sorted(unique(values.values(), key=id),
+                  key=preferred):
+                    file.write('Value {}\n'.format(' '.join(value['names'])))
+
+                    for lower, upper in value.get('codepoints', []):
+                        if lower == upper:
+                            file.write('{:04X}\n'.format(lower))
+                        else:
+                            file.write('{:04X}..{:04X}\n'.format(lower, upper))
+
+        file.write('SimpleFolding\n')
+
+        for delta, ranges in unicode_data['simple_folding'].items():
+            file.write('Value {:04X}\n'.format(delta))
+
+            for lower, upper in ranges:
+                if lower == upper:
+                    file.write('{:04X}\n'.format(lower))
+                else:
+                    file.write('{:04X}..{:04X}\n'.format(lower, upper))
+
+        file.write('FullFolding\n')
+
+        for key, ranges in unicode_data['full_folding'].items():
+            file.write('Value {}\n'.format(' '.join('{:04X}'.format(value) for
+              value in key)))
+
+            for lower, upper in ranges:
+                if lower == upper:
+                    file.write('{:04X}\n'.format(lower))
+                else:
+                    file.write('{:04X}..{:04X}\n'.format(lower, upper))
+
+def make_binary_dict():
+    binary_dict = {}
+
+    for n in range(0x100):
+        key = tuple(map(int, format(n, '08b')[ : : -1]))
+        binary_dict[key] = n
+
+    return binary_dict
+
+def collect_strings(properties):
+    strings = []
+
+    for property in properties.values():
+        try:
+            strings.extend(property['names'])
+
+            for value in property['values'].values():
+                strings.extend(value['names'])
+        except KeyError:
+            pass
+
+    return sorted(set(munge(string) for string in strings))
+
+def chunked(iterable, chunk_size):
+    sequence = iterable
+    count = len(sequence)
+
+    for start in range(0, count, chunk_size):
+        chunk = sequence[start : start +  chunk_size]
+        yield chunk
+
+def determine_entry_type(iterable):
+    lower, upper = min(iterable), max(iterable)
+
+    if 0 <= lower <= upper <= 0xFF:
+        return 'RE_UINT8'
+
+    if 0 <= lower <= upper <= 0xFFFF:
+        return 'RE_UINT16'
+
+    raise ValueError('cannot determine C type for {}..{}'.format(lower, upper))
+
+def is_binary(property):
+    return sum(1 for val in val_list if val['id'] != 0) == 1
+
+def count_ranges(property):
+    count = 0
+    default_id = property['values'][munge(property['default'])]['id']
+
+    for value in unique(property['values'].values(), key=id):
+        if value['id'] != default_id:
+            count += len(value.get('codepoints', []))
+
+    return count
+
+def generate_small_lookup(property, c_file):
+    c_file.write('''
+/* {}. */
+RE_UINT32 re_get_{}(RE_UINT32 codepoint) {{
+'''.format(property['names'][0], property['names'][0].lower()))
+
+    default_id = property['values'][munge(property['default'])]['id']
+    ranges = []
+
+    for value in unique(property['values'].values(), key=id):
+        if value['id'] != default_id:
+            val_id = value['id']
+
+            for lower, upper in value.get('codepoints', []):
+                ranges.append((lower, upper, val_id))
+
+    if len(ranges) == 1 and ranges[0][ : 2] == (0, NUM_CODEPOINTS - 1):
+        c_file.write('    return {};\n}}\n'.format(ranges[0][2]))
+    else:
+        for lower, upper, val_id in ranges:
+            width = 2 if upper <= 0xFF else 4 if upper <= 0xFFFF else 6
+
+            if lower == upper:
+                c_file.write('''\
+    if (codepoint == 0x{:0{width}X})
+        return {};
+'''.format(lower, val_id, width=width))
+            else:
+                c_file.write('''\
+    if (0x{:0{width}X} <= codepoint && codepoint <= 0x{:0{width}X})
+        return {};
+'''.format(lower, upper, val_id, width=width))
+
+        c_file.write('\n    return {};\n}}\n'.format(default_id))
+
+def generate_table(table_name, values, c_file, max_columns=16, public=False):
+    entry_type = determine_entry_type(values)
+
+    if public:
+        c_file.write('{} {}[] = {{\n'.format(entry_type, table_name))
+    else:
+        c_file.write('static {} {}[] = {{\n'.format(entry_type, table_name))
+
+    entries = [str(value) for value in values]
+    max_width = max(len(entry) for entry in entries)
+    entries = [entry.rjust(max_width) + ',' for entry in entries]
+    entries[-1] = entries[-1].rstrip(',')
+
+    for chunk in chunked(entries, max_columns):
+        c_file.write('    %s\n' % ' '.join(chunk))
+
+    c_file.write('};\n')
+
+def generate_lookup(property, c_file):
+    val_list = list(unique(property['values'].values(), key=id))
+
+    if count_ranges(property) <= 8:
+        generate_small_lookup(property, c_file)
+        return
+
+    default_id = property['values'][munge(property['default'])]['id']
+    entries = [default_id] * NUM_CODEPOINTS
+
+    for value in val_list:
+        val_id = value['id']
+
+        for lower, upper in value.get('codepoints', []):
+            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
+
+    CHUNK_SIZE = 32
+
+    indexes = []
+    chunks = {}
+
+    for chunk in chunked(tuple(entries), CHUNK_SIZE):
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    entries = indexes
+    indexes = []
+    chunks = {}
+
+    for start in range(0, len(entries), CHUNK_SIZE):
+        chunk = tuple(entries[start : start + CHUNK_SIZE])
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    table_0 = indexes
+
+    c_file.write('\n/* {}. */\n'.format(property['names'][0]))
+
+    prop_name = property['names'][0].lower()
+    binary = set(table_2) == {0, 1}
+
+    for i, table in enumerate([table_0, table_1, table_2]):
+        if i == 2 and binary:
+            binary = True
+            entries = []
+
+            for start in range(0, len(table), 8):
+                entries.append(binary_dict[tuple(table[start : start + 8])])
+
+            table = entries
+
+        if i > 0:
+            c_file.write('\n')
+
+        generate_table('re_{}_table_{}'.format(prop_name, 1 + i), table,
+          c_file)
+
+    if binary:
+        c_file.write('''
+RE_UINT32 re_get_{0}(RE_UINT32 codepoint) {{
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 offset;
+    RE_UINT32 v;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = (codepoint >> 3) & 0x3;
+    offset = codepoint & 0x7;
+
+    v = re_{0}_table_1[field_2];
+    v = re_{0}_table_2[(v << 5) | field_1];
+    v = re_{0}_table_3[(v << 2) | field_0];
+
+    return (v >> offset) & 0x1;
+}}
+'''.format(prop_name))
+    else:
+        c_file.write('''
+RE_UINT32 re_get_{0}(RE_UINT32 codepoint) {{
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 v;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = codepoint & 0x1F;
+
+    v = re_{0}_table_1[field_2];
+    v = re_{0}_table_2[(v << 5) | field_1];
+    v = re_{0}_table_3[(v << 5) | field_0];
+
+    return v;
+}}
+'''.format(prop_name))
+
+def generate_script_extensions_lookup(properties, property, c_file):
+    entries = [0] * NUM_CODEPOINTS
+
+    # Initialise with script.
+    val_list = unique(properties[munge('Script')]['values'].values(), key=id)
+
+    for value in val_list:
+        val_id = value['id']
+
+        for lower, upper in value.get('codepoints', []):
+            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
+
+    script_count = 1 + max(value['id'] for value in
+      properties[munge('Script')]['values'].values())
+
+    val_list = unique(property['values'].values(), key=id)
+
+    for value in val_list:
+        val_id = value['id']
+
+        for lower, upper in value.get('codepoints', []):
+            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
+
+    CHUNK_SIZE = 32
+
+    indexes = []
+    chunks = {}
+
+    for chunk in chunked(entries, CHUNK_SIZE):
+        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
+
+    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    entries = indexes
+    indexes = []
+    chunks = {}
+
+    for start in range(0, len(entries), CHUNK_SIZE):
+        chunk = tuple(entries[start : start + CHUNK_SIZE])
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    table_0 = indexes
+
+    c_file.write('\n/* {}. */\n'.format(property['names'][0]))
+
+    prop_name = property['names'][0].lower()
+
+    for i, table in enumerate([table_0, table_1, table_2]):
+        generate_table('{}_table_{}'.format(prop_name, 1 + i), table, c_file)
+
+    script_values = properties[munge('Script')]['values']
+    ext_dict = {}
+
+    for key, value in property['values'].items():
+        ext_dict[value['id']] = [script_values[munge(name)]['id'] for name in
+          key]
+
+    offsets = []
+    entries = []
+
+    for key, value in sorted(ext_dict.items()):
+        offsets.append(len(entries))
+        entries.extend(value + [0])
+
+    generate_table('{}_table_4'.format(prop_name), offsets, c_file)
+
+    generate_table('{}_table_5'.format(prop_name), entries, c_file)
+
+    c_file.write('''
+int re_get_{0}(RE_UINT32 codepoint, RE_UINT8* scripts) {{
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 v;
+    int offset;
+    int count;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = codepoint & 0x1F;
+
+    v = {0}_table_1[field_2];
+    v = {0}_table_2[(v << 5) | field_1];
+    v = {0}_table_3[(v << 5) | field_0];
+
+    if (v < {1}) {{
+        scripts[0] = v;
+
+        return 1;
+    }}
+
+    offset = {0}_table_4[v - {1}];
+    count = 0;
+
+    do {{
+        scripts[count] = {0}_table_5[offset + count];
+        ++count;
+    }} while ({0}_table_5[offset + count] != 0);
+
+    return count;
+}}
+'''.format(prop_name, script_count))
+
+def generate_all_cases(unicode_data, c_file):
+    simple_folding = unicode_data['simple_folding']
+
+    all_cases = {}
+
+    for delta, ranges in simple_folding.items():
+        for lower, upper in ranges:
+            for codepoint in range(lower, upper + 1):
+                folded = codepoint ^ delta
+                all_cases.setdefault(folded, set()).update({codepoint, folded})
+
+    for codepoint in list(all_cases):
+        cases = {codepoint} | all_cases.get(codepoint, set())
+
+        for c in list(cases):
+            cases |= all_cases.get(c, set())
+
+        for c in cases:
+            all_cases[c] = cases
+
+    all_cases[0x49] = {0x49, 0x69, 0x131} # Dotless capital I.
+    all_cases[0x69] = {0x69, 0x49, 0x130} # Dotted small I.
+    all_cases[0x130] = {0x130, 0x69} # Dotted capital I.
+    all_cases[0x131] = {0x131, 0x49} # Dotless small I.
+
+    entries = [0] * NUM_CODEPOINTS
+    others_dict = {(0, ): 0}
+
+    for codepoint, cases in all_cases.items():
+        others = sorted(cases - {codepoint})
+        key = tuple([others[0] ^ codepoint] + others[1 : ])
+        entries[codepoint] = others_dict.setdefault(key, len(others_dict))
+
+    CHUNK_SIZE = 32
+
+    indexes = []
+    chunks = {}
+
+    for chunk in chunked(entries, CHUNK_SIZE):
+        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
+
+    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    entries = indexes
+    indexes = []
+    chunks = {}
+
+    for start in range(0, len(entries), CHUNK_SIZE):
+        chunk = tuple(entries[start : start + CHUNK_SIZE])
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    table_0 = indexes
+
+    c_file.write('\n/* All cases. */\n')
+
+    for i, table in enumerate([table_0, table_1, table_2]):
+        if i > 0:
+            c_file.write('\n')
+
+        generate_table('re_all_cases_table_{}'.format(1 + i), table, c_file)
+
+    c_file.write('\nstatic RE_AllCases re_all_cases_table_4[] = {\n')
+
+    max_columns = max(len(value) for value in others_dict)
+
+    max_width = max(len(str(item)) for value in others_dict for item in value)
+    fmt = '    {{{:%d}, {{' % max_width + ', '.join(['{:%d}' % max_width] *
+      (max_columns -1)) + '}}}},\n'
+
+    lines = []
+
+    for values in sorted(others_dict, key=others_dict.get):
+        values = list(values) + [0] * max_columns
+        lines.append(fmt.format(*values))
+
+    lines[-1] = lines[-1].rstrip(',\n') + '\n'
+
+    c_file.writelines(lines)
+
+    c_file.write('};\n')
+
+    c_file.write('''
+int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases) {
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 v;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = codepoint & 0x1F;
+
+    v = re_all_cases_table_1[field_2];
+    v = re_all_cases_table_2[(v << 5) | field_1];
+    v = re_all_cases_table_3[(v << 5) | field_0];
+
+    cases[0] = codepoint;
+
+    if (re_all_cases_table_4[v].delta == 0)
+        return 1;
+
+    cases[1] = codepoint ^ re_all_cases_table_4[v].delta;
+
+    if (re_all_cases_table_4[v].others[0] == 0)
+        return 2;
+
+    cases[2] = re_all_cases_table_4[v].others[0];
+
+    if (re_all_cases_table_4[v].others[1] == 0)
+        return 3;
+
+    cases[3] = re_all_cases_table_4[v].others[1];
+
+    return 4;
+}
+''')
+
+def generate_simple_case_folding(unicode_data, c_file):
+    simple_folding = unicode_data['simple_folding']
+
+    entries = [0] * NUM_CODEPOINTS
+    value_dict = {0: 0}
+
+    for delta, ranges in sorted(simple_folding.items()):
+        val_id = value_dict.setdefault(delta, len(value_dict))
+
+        for lower, upper in ranges:
+            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
+
+    CHUNK_SIZE = 32
+
+    indexes = []
+    chunks = {}
+
+    for chunk in chunked(entries, CHUNK_SIZE):
+        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
+
+    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    entries = indexes
+    indexes = []
+    chunks = {}
+
+    for start in range(0, len(entries), CHUNK_SIZE):
+        chunk = tuple(entries[start : start + CHUNK_SIZE])
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    table_0 = indexes
+
+    c_file.write('\n/* Simple case folding. */\n')
+
+    for i, table in enumerate([table_0, table_1, table_2]):
+        if i > 0:
+            c_file.write('\n')
+
+        generate_table('re_simple_folding_table_{}'.format(1 + i), table, c_file)
+
+    c_file.write('\nstatic RE_UINT16 re_simple_folding_table_4[] = {\n')
+
+    entries = [str(value) for value in sorted(value_dict, key=value_dict.get)]
+    max_width = max(len(entry) for entry in entries)
+    entries = [entry.rjust(max_width) + ',' for  entry in entries]
+    entries[-1] = entries[-1].rstrip(',')
+
+    for chunk in chunked(entries, 8):
+        c_file.write('    %s\n' % ' '.join(chunk))
+
+    c_file.write('};\n')
+
+    c_file.write('''
+RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint) {
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 v;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = codepoint & 0x1F;
+
+    v = re_simple_folding_table_1[field_2];
+    v = re_simple_folding_table_2[(v << 5) | field_1];
+    v = re_simple_folding_table_3[(v << 5) | field_0];
+
+    return codepoint ^ re_simple_folding_table_4[v];
+}
+''')
+
+def generate_full_case_folding(unicode_data, c_file):
+    full_folding = unicode_data['full_folding']
+
+    entries = [0] * NUM_CODEPOINTS
+    value_dict = {(0, ): 0}
+
+    for delta, ranges in sorted(full_folding.items()):
+        val_id = value_dict.setdefault(delta, len(value_dict))
+
+        for lower, upper in ranges:
+            entries[lower : upper + 1] = [val_id] * (upper - lower + 1)
+
+    CHUNK_SIZE = 32
+
+    indexes = []
+    chunks = {}
+
+    for chunk in chunked(entries, CHUNK_SIZE):
+        indexes.append(chunks.setdefault(tuple(chunk), len(chunks)))
+
+    table_2 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    entries = indexes
+    indexes = []
+    chunks = {}
+
+    for start in range(0, len(entries), CHUNK_SIZE):
+        chunk = tuple(entries[start : start + CHUNK_SIZE])
+        indexes.append(chunks.setdefault(chunk, len(chunks)))
+
+    table_1 = list(chain(*sorted(chunks, key=chunks.get)))
+
+    table_0 = indexes
+
+    c_file.write('\n/* Full case folding. */\n')
+
+    for i, table in enumerate([table_0, table_1, table_2]):
+        if i > 0:
+            c_file.write('\n')
+
+        generate_table('re_full_folding_table_{}'.format(1 + i), table, c_file)
+
+    c_file.write('\nstatic RE_FullCaseFolding re_full_folding_table_4[] = {\n')
+
+    max_folded = max(len(value) for value in value_dict)
+    max_width = max(len(str(item)) for value in value_dict for item in value)
+    rows = [(value + (0, ) * max_folded)[ : max_folded] for value in
+      sorted(value_dict, key=value_dict.get)]
+    fmt = ('    {{{{' + ', '.join(['{:%d}' % max_width] * max_folded) +
+      '}}}},\n').format
+    lines = []
+
+    for row in rows:
+        lines.append(fmt(*row))
+
+    lines[-1] = lines[-1].rstrip(',\n') + '\n'
+
+    c_file.writelines(lines)
+
+    c_file.write('};\n')
+
+    c_file.write('''
+int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded) {
+    RE_UINT32 field_2;
+    RE_UINT32 field_1;
+    RE_UINT32 field_0;
+    RE_UINT32 v;
+    RE_UINT16* data;
+
+    field_2 = codepoint >> 10;
+    field_1 = (codepoint >> 5) & 0x1F;
+    field_0 = codepoint & 0x1F;
+
+    v = re_full_folding_table_1[field_2];
+    v = re_full_folding_table_2[(v << 5) | field_1];
+    v = re_full_folding_table_3[(v << 5) | field_0];
+
+    data = re_full_folding_table_4[v].data;
+    folded[0] = codepoint ^ data[0];
+
+    if (data[1] == 0)
+        return 1;
+
+    folded[1] = data[1];
+
+    if (data[2] == 0)
+        return 2;
+
+    folded[2] = data[2];
+
+    return 3;
+}
+''')
+
+def generate_code(unicode_data, tools_folder):
+    print('Generating code')
+
+    # Codepoints that expand on full casefolding.
+    expanded = []
+
+    for key, ranges in unicode_data['full_folding'].items():
+        if len(key) > 1:
+            for lower, upper in ranges:
+                expanded.extend(range(lower, upper + 1))
+
+    expanded.sort()
+
+    # Assign the property and value IDs.
+    properties = unicode_data['properties']
+    prop_list = list(unique(properties.values(), key=id))
+    prop_list.sort(key=preferred)
+
+    unicode_data['property_tablee_count'] = len(properties)
+    unicode_data['property_count'] = len(prop_list)
+
+    no_yes_maybe = {
+        'NO', 'N', 'FALSE', 'F',
+        'YES', 'Y', 'TRUE', 'T',
+        'MAYBE', 'M',
+    }
+
+    yes_no_maybe_dict = {'No': 0, 'Yes': 1, 'Maybe': 2}
+
+    for prop_id, property in enumerate(prop_list):
+        property['id'] = prop_id
+
+        if property['names'][0] == 'Script_Extensions':
+            script_count = 1 + max(val['id'] for val in
+              properties[munge('Script')]['values'].values())
+
+            def make_key(value):
+                return value['codepoints'].lowest()
+
+            val_list = list(unique(property['values'].values(), key=id))
+            val_list.sort(key=make_key)
+
+            for val_id, value in enumerate(val_list):
+                value['id'] = script_count + val_id
+        else:
+            default = property['default']
+
+            if not (set(property['values']) - no_yes_maybe):
+
+                def make_key(value):
+                    return yes_no_maybe_dict[value['names'][0]]
+
+            else:
+
+                def make_key(value):
+                    if munge(value['names'][0]) == default:
+                        return (0, )
+
+                    if 'codepoints' not in value:
+                        return (2, )
+
+                    return 1, value['codepoints'].lowest()
+
+            val_list = list(unique(property['values'].values(), key=id))
+            val_list.sort(key=make_key)
+
+            def make_key(val):
+                name_list = [name for name in val['names'] if '&' in name]
+
+                if name_list:
+                    return 1, name_list[0][0]
+
+                return 0
+
+            if property['names'][0] == 'General_Category':
+
+                def make_key(value):
+                    for name in value['names']:
+                        if '&' in name:
+                            return (1, name)
+
+                    if value.get('codepoints'):
+                        return (0, )
+
+                    return (2, munge(value['names'][0]))
+
+                for val_id, value in enumerate(sorted(val_list, key=make_key)):
+                    value['id'] = val_id
+            else:
+                for val_id, value in enumerate(val_list):
+                    value['id'] = val_id
+
+    # Collect the value sets.
+    valueset_dict = {}
+
+    for property in sorted(prop_list, key=lambda prop: prop['id']):
+        prop_name = property['names'][0]
+
+        if prop_name == 'Script_Extensions':
+            property['valueset_id'] = properties[munge('Script')]['valueset_id']
+        else:
+            valueset = []
+
+            val_list = list(unique(property['values'].values(), key=id))
+
+            for value in sorted(val_list, key=lambda val: val['id']):
+                valueset.append(tuple(value['names']))
+
+            valueset_id = valueset_dict.setdefault(tuple(valueset),
+              len(valueset_dict))
+            property['valueset_id'] = valueset_id
+
+    strings = collect_strings(properties)
+
+    c_path = join(tools_folder, 'unicode.c')
+    h_path = join(tools_folder, 'unicode.h')
+
+    with open(c_path, 'w', newline='\n', encoding='ascii') as c_file:
+        c_file.write('''\
+/* For Unicode version {} */
+
+#include "_regex_unicode.h"
+
+#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP))
+#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN))
+#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC))
+
+typedef struct {{
+    RE_UINT8 scripts[RE_MAX_SCX];
+}} RE_ScriptExt;
+
+typedef struct {{
+    RE_UINT32 delta;
+    RE_UINT16 others[RE_MAX_CASES - 1];
+}} RE_AllCases;
+
+typedef struct {{
+    RE_UINT16 data[RE_MAX_FOLDED];
+}} RE_FullCaseFolding;
+
+/* Strings. */
+char* re_strings[] = {{
+'''.format(UNICODE_VERSION))
+
+        lines = []
+
+        for string in strings:
+            lines.append('    "{}",\n'.format(string))
+
+        strings_dict = {string: i for i, string in enumerate(strings)}
+
+        unicode_data['string_count'] = len(strings_dict)
+
+        c_file.writelines(lines)
+        c_file.write('''\
+};
+
+/* Properties. */
+RE_Property re_properties[] = {
+''')
+
+        for prop_id, property in enumerate(sorted(prop_list, key=lambda prop:
+          prop['id'])):
+            for name in property['names']:
+                c_file.write('    {{{:4}, {:2}, {:2}}}, /* {} */\n'.format(strings_dict[munge(name)],
+                  prop_id, property['valueset_id'], munge(name)))
+
+        c_file.write('''\
+};
+
+/* Property values. */
+RE_PropertyValue re_property_values[] = {
+''')
+
+        def make_key(names):
+            if any(len(name) == 2 for name in names):
+                return 0
+
+            return 1
+
+        gc_valset_id = properties[munge('General_Category')]['valueset_id']
+        count = 0
+
+        for valset, valset_id in sorted(valueset_dict.items(), key=lambda pair:
+          pair[1]):
+            if valset_id == gc_valset_id:
+                valset = sorted(valset, key=make_key)
+
+            for val_id, names in enumerate(valset):
+                for name in names:
+                    c_file.write('''    {{{:4}, {:2}, {:3}}}, /* {} */\n'''.format(strings_dict[munge(name)],
+                      valset_id, val_id, munge(name)))
+
+                count += len(names)
+
+        unicode_data['valueset_table_count'] = count
+
+        c_file.write('};\n')
+
+        c_file.write('''\n/* Codepoints which expand on full case-folding. */\n''')
+
+        unicode_data['expanded_count'] = len(expanded)
+        generate_table('re_expand_on_folding', expanded, c_file, max_columns=8, public=True)
+
+        for property in prop_list:
+            print('    {}'.format(property['names'][0]), flush=True)
+
+            if property['names'][0] == 'Script_Extensions':
+                generate_script_extensions_lookup(properties, property, c_file)
+            else:
+                generate_lookup(property, c_file)
+
+        print('    All cases', flush=True)
+        generate_all_cases(unicode_data, c_file)
+
+        print('    Simple case folding', flush=True)
+        generate_simple_case_folding(unicode_data, c_file)
+
+        print('    Full case folding', flush=True)
+        generate_full_case_folding(unicode_data, c_file)
+
+        c_file.write('''
+/* Property function table. */
+RE_GetPropertyFunc re_get_property[] = {
+''')
+
+        lines = []
+
+        for property in prop_list:
+            prop_name = property['names'][0].lower()
+
+            if prop_name == 'script_extensions':
+                lines.append('    0,\n')
+            else:
+                lines.append('    re_get_{},\n'.format(prop_name))
+
+        lines[-1] = lines[-1].rstrip(',\n') + '\n'
+
+        c_file.writelines(lines)
+
+        c_file.write('};\n')
+
+    with open(h_path, 'w', newline='\n', encoding='ascii') as h_file:
+        property = unicode_data['properties'][munge('Script_Extensions')]
+        max_scx = max(len(key) for key in property['values'])
+
+        h_file.write('''\
+typedef unsigned char RE_UINT8;
+typedef signed char RE_INT8;
+typedef unsigned short RE_UINT16;
+typedef signed short RE_INT16;
+typedef unsigned int RE_UINT32;
+typedef signed int RE_INT32;
+
+typedef unsigned char BOOL;
+#if !defined(FALSE) || !defined(TRUE)
+#define FALSE 0
+#define TRUE 1
+#endif
+
+#define RE_ASCII_MAX 0x7F
+#define RE_LOCALE_MAX 0xFF
+
+#define RE_MAX_CASES 4
+#define RE_MAX_FOLDED 3
+#define RE_MAX_SCX {}
+
+typedef struct RE_Property {{
+    RE_UINT16 name;
+    RE_UINT8 id;
+    RE_UINT8 value_set;
+}} RE_Property;
+
+typedef struct RE_PropertyValue {{
+    RE_UINT16 name;
+    RE_UINT8 value_set;
+    RE_UINT16 id;
+}} RE_PropertyValue;
+
+typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
+'''.format(max_scx))
+
+        gc_id = properties[munge('General_Category')]['id']
+        cased_id = properties[munge('Cased')]['id']
+        upper_id = properties[munge('Uppercase')]['id']
+        lower_id = properties[munge('Lowercase')]['id']
+        scx_id = properties[munge('Script_Extensions')]['id']
+
+        h_file.write('''
+#define RE_PROP_GC 0x{:X}
+#define RE_PROP_CASED 0x{:X}
+#define RE_PROP_UPPERCASE 0x{:X}
+#define RE_PROP_LOWERCASE 0x{:X}
+#define RE_PROP_SCX 0x{:X}
+
+'''.format(gc_id, cased_id, upper_id, lower_id, scx_id))
+
+        gc_values = properties[munge('General_Category')]['values']
+        group_names = set('C L M N P S Z Assigned Cased_Letter'.split())
+
+        names = set(gc_values) & set(munge(name) for name in group_names)
+
+        for name in sorted(names, key=lambda name: gc_values[name]['id']):
+            h_file.write('#define RE_PROP_{} {}\n'.format(name,
+              gc_values[name]['id']))
+
+        h_file.write('\n')
+
+        val_list = []
+        masks = {}
+
+        for name in gc_values:
+            if len(name) != 2 or not name.isalpha():
+                continue
+
+            if not gc_values[name].get('codepoints'):
+                continue
+
+            val_id = gc_values[name]['id']
+            val_list.append((val_id, name))
+            masks.setdefault(name[0], 0)
+            masks[name[0]] |= 1 << val_id
+
+        for val_id, name in sorted(val_list):
+            h_file.write('#define RE_PROP_{} {}\n'.format(name, val_id))
+
+        h_file.write('\n')
+
+        for name, mask in sorted(masks.items()):
+            h_file.write('#define RE_PROP_{}_MASK 0x{:08X}\n'.format(name,
+              mask))
+
+        h_file.write('\n')
+
+        common = '''
+            Alnum Alpha Any Ascii Blank Cntrl Digit Graph Lower Print Space
+            Upper Word Xdigit Posix_Alnum Posix_Digit Posix_Punct Posix_Xdigit
+        '''
+
+        for name in common.split():
+            property = properties.get(munge(name))
+
+            if property is not None:
+                h_file.write('#define RE_PROP_{} 0x{:06X}\n'.format(name.upper(),
+                  (property['id'] << 16) | 1))
+            else:
+                for prop_name in ['GC', 'Script', 'Block']:
+                    property = properties[munge(prop_name)]
+                    value = property['values'].get(munge(name))
+
+                    if value is not None:
+                        h_file.write('#define RE_PROP_{} 0x{:06X}\n'.format(name.upper(),
+                          (property['id'] << 16) | value['id']))
+                        break
+
+        h_file.write('\n')
+
+        val_list = unique(properties[munge('Word_Break')]['values'].values(),
+          key=id)
+        values = [(value['id'], value['names'][0]) for value in val_list]
+
+        for val_id, name in sorted(values):
+            h_file.write('#define RE_WBREAK_{} {}\n'.format(munge(name),
+              val_id))
+
+        h_file.write('\n')
+
+        val_list = unique(properties[munge('Grapheme_Cluster_Break')]['values'].values(),
+          key=id)
+        values = [(value['id'], value['names'][0]) for value in val_list]
+
+        for val_id, name in sorted(values):
+            h_file.write('#define RE_GBREAK_{} {}\n'.format(munge(name),
+              val_id))
+
+        h_file.write('\n')
+
+        val_list = unique(properties[munge('Line_Break')]['values'].values(),
+          key=id)
+        values = [(value['id'], value['names'][0]) for value in val_list]
+
+        for val_id, name in sorted(values):
+            h_file.write('#define RE_LBREAK_{} {}\n'.format(munge(name),
+              val_id))
+
+        h_file.write('\n')
+
+        h_file.write('extern char* re_strings[{}];\n'.format(unicode_data['string_count']))
+        h_file.write('extern RE_Property re_properties[{}];\n'.format(unicode_data['property_tablee_count']))
+        h_file.write('extern RE_PropertyValue re_property_values[{}];\n'.format(unicode_data['valueset_table_count']))
+        h_file.write('extern RE_UINT16 re_expand_on_folding[{}];\n'.format(unicode_data['expanded_count']))
+        h_file.write('extern RE_GetPropertyFunc re_get_property[{}];\n'.format(unicode_data['property_count']))
+
+        h_file.write('\n')
+
+        for property in prop_list:
+            prop_name = property['names'][0]
+
+            if prop_name == 'Script_Extensions':
+                h_file.write('int re_get_{}(RE_UINT32 codepoint, RE_UINT8* scripts);\n'.format(prop_name.lower()))
+            else:
+                h_file.write('RE_UINT32 re_get_{}(RE_UINT32 codepoint);\n'.format(prop_name.lower()))
+
+        h_file.write('int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases);\n')
+        h_file.write('RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint);\n')
+        h_file.write('int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded);\n')
+
+# Whether to update the Unicode data files from the Unicode website.
+UNICODE_VERSION = '14.0.0'
+
+# The URL from which the Unicode data files can be obtained.
+unicode_data_base = 'http://www.unicode.org/Public/UNIDATA/'
+
+NUM_CODEPOINTS = 0x110000
+
+# The Unicode data files. The file names are relative to the website URL.
+unicode_data_files = '''
+[aliases]
+PropertyAliases.txt
+PropertyValueAliases.txt
+[binary]
+PropList.txt
+extracted/DerivedBinaryProperties.txt
+DerivedCoreProperties.txt
+[emoji]
+emoji/emoji-data.txt
+[normalisation]
+DerivedNormalizationProps.txt
+[multivalue]
+auxiliary/GraphemeBreakProperty.txt
+auxiliary/SentenceBreakProperty.txt
+auxiliary/WordBreakProperty.txt
+Blocks.txt
+extracted/DerivedBidiClass.txt
+extracted/DerivedCombiningClass.txt
+extracted/DerivedDecompositionType.txt
+extracted/DerivedEastAsianWidth.txt
+extracted/DerivedGeneralCategory.txt
+extracted/DerivedJoiningGroup.txt
+extracted/DerivedJoiningType.txt
+extracted/DerivedLineBreak.txt
+extracted/DerivedNumericType.txt
+HangulSyllableType.txt
+IndicPositionalCategory.txt
+IndicSyllabicCategory.txt
+Scripts.txt
+[numeric_values]
+extracted/DerivedNumericValues.txt
+[case_folding]
+CaseFolding.txt
+[script_extensions]
+ScriptExtensions.txt
+'''
+
+data_files = {}
+section = ''
+
+for line in unicode_data_files.splitlines():
+    if line[ : 1] + line[-1 : ] == '[]':
+        section = line[1 : -1]
+    elif line:
+        data_files.setdefault(section, []).append(line)
+
+# The generated C files will be written into this folder.
+tools_folder = dirname(__file__)
+
+# The local folder in which the Unicode data files are stored.
+data_folder = join(tools_folder, 'unicode_data')
+
+with suppress(FileExistsError):
+    mkdir(data_folder)
+
+download_unicode_files(unicode_data_base, data_files, data_folder)
+
+unicode_data = parse_unicode_files(data_files, data_folder)
+make_additional_properties(unicode_data)
+write_summary(unicode_data, tools_folder)
+
+binary_dict = make_binary_dict()
+
+generate_code(unicode_data, tools_folder)
+
+print('\nFinished!')