Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • janitor-team/proposed/ocaml-re
  • ocaml-team/ocaml-re
2 results
Show changes
Commits on Source (6)
1.9.0 (05-Apr-2019)
-------------------
* Fix regression in `Re.exec_partial` (#164)
* Mov gen related functions to `Re.Gen` and deprecate the old names (#167)
* Introduce `Re.View` that exposes the internal representation (#163)
1.8.0 (04-Aug-2018)
-------------------
* Fix index-out-of-bounds exception in Re.Perl.re (#160)
* Add seq based iterators (#170)
1.7.3 (05-Mar-2018)
-------------------
......
Requirements
The installation procedure defined in the Makefile requires findlib
(http://www.ocaml-programming.de/packages/documentation/findlib/).
Installation
- Compile with "make all".
- If you have ocamlopt, do also "make opt".
- Become super-user if necessary and do "make install"
(A "make uninstall" removes the library.)
JBUILDER ?= jbuilder
DUNE ?= dune
all:
@$(JBUILDER) build
@$(DUNE) build
test:
@$(JBUILDER) runtest
@$(DUNE) runtest
check: test
clean:
@$(JBUILDER) clean
@$(DUNE) clean
.PHONY: check test all clean
.PHONY: all-supported-ocaml-versions
all-supported-ocaml-versions:
jbuilder build @runtest --workspace jbuild-workspace.dev
dune build @runtest --workspace dune-workspace.dev
......@@ -18,20 +18,18 @@ Features
The following styles of regular expressions are supported:
- Perl-style regular expressions (module `Re.Perl`);
- Posix extended regular expressions (module `Re_posix`);
- Posix extended regular expressions (module `Re.Posix`);
- Emacs-style regular expressions (module `Re.Emacs`);
- Shell-style file globbing (module `Re_glob`).
- Shell-style file globbing (module `Re.Glob`).
It is also possible to build regular expressions by combining simpler
regular expressions (module `Re`).
It is also possible to build regular expressions by combining simpler regular
expressions (module `Re`).
The most notable missing features are **back-references** and
look-ahead/look-behind **assertions**.
There is also a subset of the PCRE interface available in the
`Re.pcre` library. This makes it easier to port code from that
library to Re without any changes beyond replacing the `pcre`
findlib package with `re.pcre`.
There is also a subset of the PCRE interface available in the `Re.Pcre` module.
This makes it easier to port code from that library to Re minimal changes.
Performances
============
......
(executable
(libraries re threads core_bench)
(name benchmark))
(jbuild_version 1)
(executable
((libraries (re threads core_bench))
(name benchmark)))
ocaml-re (1.7.3-3) UNRELEASED; urgency=medium
ocaml-re (1.9.0-1) UNRELEASED; urgency=medium
[ Stéphane Glondu ]
* New upstream release
[ Mehdi Dogguy ]
* Minor simplification in install target
-- Mehdi Dogguy <mehdi@debian.org> Sun, 03 Jun 2018 22:56:32 +0200
-- Stéphane Glondu <glondu@debian.org> Sat, 10 Aug 2019 05:28:41 +0200
ocaml-re (1.7.3-2) unstable; urgency=medium
......
......@@ -6,13 +6,13 @@ Uploaders:
Mehdi Dogguy <mehdi@debian.org>
Build-Depends:
debhelper (>= 9),
ocaml-nox (>= 3.12.0~),
ocaml-nox (>= 4.07),
ocaml-best-compilers,
ocaml-findlib,
libounit-ocaml-dev,
jbuilder,
dh-ocaml (>= 0.9~)
Standards-Version: 3.9.8
Standards-Version: 4.4.0
Homepage: https://github.com/ocaml/ocaml-re
Vcs-Browser: https://salsa.debian.org/ocaml-team/ocaml-re
Vcs-Git: https://salsa.debian.org/ocaml-team/ocaml-re.git
......
(library
(name re_str)
(public_name re.str)
(wrapped false)
(modules re_str)
(synopsis "Deprecated. Use Re.Str")
(libraries re))
(library
(name re_pcre)
(public_name re.pcre)
(wrapped false)
(modules re_pcre)
(synopsis "Deprecated. Use Re.Pcre")
(libraries re))
(library
(name re_perl)
(public_name re.perl)
(wrapped false)
(modules re_perl)
(synopsis "Deprecated. Use Re.Perl")
(libraries re))
(library
(name re_posix)
(public_name re.posix)
(wrapped false)
(modules re_posix)
(synopsis "Deprecated. Use Re.Posix")
(libraries re))
(library
(name re_emacs)
(public_name re.emacs)
(wrapped false)
(modules re_emacs)
(synopsis "Deprecated. Use Re.Emacs")
(libraries re))
(library
(name re_glob)
(public_name re.glob)
(wrapped false)
(modules re_glob)
(synopsis "Deprecated. Use Re.Glob")
(libraries re))
(jbuild_version 1)
(library
((name re_str)
(public_name re.str)
(wrapped false)
(modules (re_str))
(synopsis "Deprecated. Use Re.Str")
(libraries (re))))
(library
((name re_pcre)
(public_name re.pcre)
(wrapped false)
(modules (re_pcre))
(synopsis "Deprecated. Use Re.Pcre")
(libraries (re))))
(library
((name re_perl)
(public_name re.perl)
(wrapped false)
(modules (re_perl))
(synopsis "Deprecated. Use Re.Perl")
(libraries (re))))
(library
((name re_posix)
(public_name re.posix)
(wrapped false)
(modules (re_posix))
(synopsis "Deprecated. Use Re.Posix")
(libraries (re))))
(library
((name re_emacs)
(public_name re.emacs)
(wrapped false)
(modules (re_emacs))
(synopsis "Deprecated. Use Re.Emacs")
(libraries (re))))
(library
((name re_glob)
(public_name re.glob)
(wrapped false)
(modules (re_glob))
(synopsis "Deprecated. Use Re.Glob")
(libraries (re))))
(env
(_ (flags (:standard -w -50))))
\ No newline at end of file
(lang dune 1.0)
(name re)
(lang dune 1.0)
;; This file is used by `make all-supported-ocaml-versions`
(context (opam (switch 4.02.3)))
(context (opam (switch 4.03.0)))
(context (opam (switch 4.04.2)))
(context (opam (switch 4.05.0)))
(context (opam (switch 4.06.1)))
(context (opam (switch 4.07.0)))
\ No newline at end of file
;; This file is used by `make all-supported-ocaml-versions`
(context ((switch 4.02.3)))
(context ((switch 4.03.0)))
(context ((switch 4.04.2)))
(context ((switch 4.05.0)))
(context ((switch 4.06.0)))
\ No newline at end of file
(* In reality, this can really be represented as a bool array.
The representation is best thought of as a list of all chars along with a
flag:
(a, 0), (b, 1), (c, 0), (d, 0), ...
characters belonging to the same color are represented by sequnces of
characters with the flag set to 0.
*)
type t = Bytes.t
let make () = Bytes.make 257 '\000'
let flatten cm =
let c = Bytes.create 256 in
let color_repr = Bytes.create 256 in
let v = ref 0 in
Bytes.set c 0 '\000';
Bytes.set color_repr 0 '\000';
for i = 1 to 255 do
if Bytes.get cm i <> '\000' then incr v;
Bytes.set c i (Char.chr !v);
Bytes.set color_repr !v (Char.chr i)
done;
(c, Bytes.sub color_repr 0 (!v + 1), !v + 1)
(* mark all the endpoints of the intervals of the char set with the 1 byte *)
let split s cm =
Cset.iter s ~f:(fun i j ->
Bytes.set cm i '\001';
Bytes.set cm (j + 1) '\001';
)
(* Color maps exists to provide an optimization for the regex engine. The fact
that some characters are entirely equivalent for some regexes means that we
can use them interchangeably.
A color map assigns a color to every character in our character set. Any two
characters with the same color will be treated equivalently by the automaton.
*)
type t
val make : unit -> t
val flatten : t -> bytes * bytes * int
val split : Cset.t -> t -> unit
This diff is collapsed.
......@@ -28,8 +28,39 @@ type t
type re
(** Compiled regular expression *)
type groups
(** Information about groups in a match. *)
(** Manipulate matching groups. *)
module Group : sig
type t
(** Information about groups in a match. *)
val get : t -> int -> string
(** Raise [Not_found] if the group did not match *)
val offset : t -> int -> int * int
(** Raise [Not_found] if the group did not match *)
val start : t -> int -> int
(** Return the start of the match. Raise [Not_found] if the group did not match. *)
val stop : t -> int -> int
(** Return the end of the match. Raise [Not_found] if the group did not match. *)
val all : t -> string array
(** Return the empty string for each group which did not match *)
val all_offset : t -> (int * int) array
(** Return [(-1,-1)] for each group which did not match *)
val test : t -> int -> bool
(** Test whether a group matched *)
val nb_groups : t -> int
(** Returns the total number of groups defined - matched or not.
This function is experimental. *)
val pp : Format.formatter -> t -> unit
end
type groups = Group.t [@@ocaml.deprecated "Use Group.t"]
(** {2 Compilation and execution of a regular expression} *)
......@@ -40,7 +71,7 @@ val compile : t -> re
val exec :
?pos:int -> (* Default: 0 *)
?len:int -> (* Default: -1 (until end of string) *)
re -> string -> groups
re -> string -> Group.t
(** [exec re str] matches [str] against the compiled expression [re],
and returns the matched groups if any.
@param pos optional beginning of the string (default 0)
......@@ -52,7 +83,7 @@ val exec :
val exec_opt :
?pos:int -> (* Default: 0 *)
?len:int -> (* Default: -1 (until end of string) *)
re -> string -> groups option
re -> string -> Group.t option
(** Similar to {!exec}, but returns an option instead of using an exception. *)
val execp :
......@@ -68,41 +99,6 @@ val exec_partial :
re -> string -> [ `Full | `Partial | `Mismatch ]
(** More detailed version of {!exec_p} *)
(** Manipulate matching groups. *)
module Group : sig
type t = groups
(** Information about groups in a match. *)
val get : t -> int -> string
(** Raise [Not_found] if the group did not match *)
val offset : t -> int -> int * int
(** Raise [Not_found] if the group did not match *)
val start : t -> int -> int
(** Return the start of the match. Raise [Not_found] if the group did not match. *)
val stop : t -> int -> int
(** Return the end of the match. Raise [Not_found] if the group did not match. *)
val all : t -> string array
(** Return the empty string for each group which did not match *)
val all_offset : t -> (int * int) array
(** Return [(-1,-1)] for each group which did not match *)
val test : t -> int -> bool
(** Test whether a group matched *)
val nb_groups : t -> int
(** Returns the total number of groups defined - matched or not.
This function is experimental. *)
val pp : Format.formatter -> t -> unit
end
(** Marks *)
module Mark : sig
......@@ -124,62 +120,84 @@ end
(** {2 High Level Operations} *)
type split_token =
[ `Text of string (** Text between delimiters *)
| `Delim of Group.t (** Delimiter *)
]
type 'a seq = 'a Seq.t
module Seq : sig
val all :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> Group.t Seq.t
(** Same as {!all} but returns an iterator
@since NEXT_RELEASE *)
val matches :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string Seq.t
(** Same as {!matches}, but returns an iterator
@since NEXT_RELEASE *)
val split :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string Seq.t
(** @since NEXT_RELEASE *)
val split_full :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> split_token Seq.t
(** @since NEXT_RELEASE *)
end
val all : ?pos:int -> ?len:int -> re -> string -> Group.t list
(** Repeatedly calls {!exec} on the given string, starting at given position and
length.*)
type 'a gen = unit -> 'a option
val all :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> Group.t list
(** Repeatedly calls {!exec} on the given string, starting at given
position and length.*)
val all_gen : ?pos:int -> ?len:int -> re -> string -> Group.t gen
[@@ocaml.deprecated "Use Seq.all"]
val all_gen :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> Group.t gen
(** Same as {!all} but returns a generator *)
val all_seq : ?pos:int -> ?len:int -> re -> string -> Group.t seq
[@@ocaml.deprecated "Use Seq.all"]
val matches :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string list
(** Same as {!all}, but extracts the matched substring rather than
returning the whole group. This basically iterates over matched
strings *)
val matches : ?pos:int -> ?len:int -> re -> string -> string list
(** Same as {!all}, but extracts the matched substring rather than returning
the whole group. This basically iterates over matched strings *)
val matches_gen :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string gen
(** Same as {!matches}, but returns a generator. *)
val matches_gen : ?pos:int -> ?len:int -> re -> string -> string gen
[@@ocaml.deprecated "Use Seq.matches"]
val split :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string list
(** [split re s] splits [s] into chunks separated by [re]. It yields
the chunks themselves, not the separator. For instance
this can be used with a whitespace-matching re such as ["[\t ]+"]. *)
val matches_seq : ?pos:int -> ?len:int -> re -> string -> string seq
[@@ocaml.deprecated "Use Seq.matches"]
val split_gen :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string gen
val split : ?pos:int -> ?len:int -> re -> string -> string list
(** [split re s] splits [s] into chunks separated by [re]. It yields the chunks
themselves, not the separator. For instance this can be used with a
whitespace-matching re such as ["[\t ]+"]. *)
type split_token =
[ `Text of string (** Text between delimiters *)
| `Delim of Group.t (** Delimiter *)
]
val split_gen : ?pos:int -> ?len:int -> re -> string -> string gen
[@@ocaml.deprecated "Use Seq.split"]
val split_full :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> split_token list
val split_seq : ?pos:int -> ?len:int -> re -> string -> string seq
[@@ocaml.deprecated "Use Seq.split"]
val split_full_gen :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> split_token gen
val split_full : ?pos:int -> ?len:int -> re -> string -> split_token list
(** [split re s] splits [s] into chunks separated by [re]. It yields the chunks
along with the separators. For instance this can be used with a
whitespace-matching re such as ["[\t ]+"]. *)
val split_full_gen : ?pos:int -> ?len:int -> re -> string -> split_token gen
[@@ocaml.deprecated "Use Seq.split_full"]
val split_full_seq : ?pos:int -> ?len:int -> re -> string -> split_token seq
[@@ocaml.deprecated "Use Seq.split_full"]
val replace :
?pos:int -> (** Default: 0 *)
......@@ -372,6 +390,31 @@ val pp_re : Format.formatter -> re -> unit
(** Alias for {!pp_re}. Deprecated *)
val print_re : Format.formatter -> re -> unit
module View : sig
type outer
(** A view of the top-level of a regex. This type is unstable and may change *)
type t =
Set of Cset.t
| Sequence of outer list
| Alternative of outer list
| Repeat of outer * int * int option
| Beg_of_line | End_of_line
| Beg_of_word | End_of_word | Not_bound
| Beg_of_str | End_of_str
| Last_end_of_line | Start | Stop
| Sem of Automata.sem * outer
| Sem_greedy of Automata.rep_kind * outer
| Group of outer | No_group of outer | Nest of outer
| Case of outer | No_case of outer
| Intersection of outer list
| Complement of outer list
| Difference of outer * outer
| Pmark of Pmark.t * outer
val view : outer -> t
end with type outer := t
(** {2 Experimental functions}. *)
val witness : t -> string
......@@ -381,28 +424,37 @@ val witness : t -> string
(** {2 Deprecated functions} *)
type substrings = Group.t
[@@ocaml.deprecated "Use Group.t"]
(** Alias for {!Group.t}. Deprecated *)
val get : Group.t -> int -> string
[@@ocaml.deprecated "Use Group.get"]
(** Same as {!Group.get}. Deprecated *)
val get_ofs : Group.t -> int -> int * int
[@@ocaml.deprecated "Use Group.offset"]
(** Same as {!Group.offset}. Deprecated *)
val get_all : Group.t -> string array
[@@ocaml.deprecated "Use Group.all"]
(** Same as {!Group.all}. Deprecated *)
val get_all_ofs : Group.t -> (int * int) array
[@@ocaml.deprecated "Use Group.all_offset"]
(** Same as {!Group.all_offset}. Deprecated *)
val test : Group.t -> int -> bool
[@@ocaml.deprecated "Use Group.test"]
(** Same as {!Group.test}. Deprecated *)
type markid = Mark.t
[@@ocaml.deprecated "Use Mark."]
(** Alias for {!Mark.t}. Deprecated *)
val marked : Group.t -> Mark.t -> bool
[@@ocaml.deprecated "Use Mark.test"]
(** Same as {!Mark.test}. Deprecated *)
val mark_set : Group.t -> Mark.Set.t
[@@ocaml.deprecated "Use Mark.all"]
(** Same as {!Mark.all}. Deprecated *)
(library
(name re)
(synopsis "Pure OCaml regular expression library")
(libraries seq)
(public_name re))
......@@ -164,12 +164,12 @@ module State = struct
end
let one ~explicit_slash ~explicit_period =
Re.(compl (
Re.compl (
List.concat [
if explicit_slash then [char '/'] else [];
if explicit_period then [char '.'] else [];
if explicit_slash then [Re.char '/'] else [];
if explicit_period then [Re.char '.'] else [];
]
))
)
let enclosed enclosed =
match enclosed with
......