Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • janitor-team/proposed/ocaml-re
  • ocaml-team/ocaml-re
2 results
Show changes
Commits on Source (4)
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: weekly
name: github pages
name: Deploy odoc to GitHub Pages
on:
push:
branches:
- master
permissions: read-all
concurrency:
group: deploy-odoc
cancel-in-progress: true
jobs:
deploy:
name: Deploy doc
deploy-odoc:
name: Deploy odoc to GitHub Pages
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
permissions:
contents: read
id-token: write
pages: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
- name: Checkout tree
uses: actions/checkout@v4
- name: Use OCaml
- name: Set-up OCaml
uses: ocaml/setup-ocaml@v2
with:
ocaml-compiler: '4.14.x'
- name: Pin
run: opam pin -n .
- name: Depext
run: opam depext -yt re
ocaml-compiler: "5.2"
- name: Deps
run: opam install -d . --deps-only
run: opam install odoc
- name: Install dependencies
run: opam install . --deps-only --with-doc
- name: Build
- name: Build documentation
run: opam exec -- dune build @doc
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
- name: Set-up Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./_build/default/_doc/_html/
destination_dir: .
enable_jekyll: true
path: _build/default/_doc/_html
- name: Deploy odoc to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
name: build
on:
push:
branches:
- master
pull_request:
branches:
- master
- push
- pull_request
jobs:
run:
name: Build
strategy:
matrix:
os:
- macos-latest
- ubuntu-latest
#- macos-latest
#- windows-latest
- windows-latest
ocaml-compiler:
- 4.08.x
- 4.14.x
- "4.14"
- "5.2"
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: ocaml/setup-ocaml@v2
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}
- run: opam pin -n .
- run: opam depext -yt re
- run: opam install -t . --deps-only
- run: opam install -y core_bench core_unix
- run: opam exec -- dune build
- run: opam exec -- dune runtest
- uses: actions/checkout@v4
- uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}
- run: opam install . --deps-only --with-test
- run: opam install core_bench core_unix
- run: opam exec -- dune build
- run: opam exec -- dune runtest
name: Nix
on:
- push
- pull_request
jobs:
tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: cachix/install-nix-action@v27
with:
nix_path: nixpkgs=channel:nixos-unstable
- run: nix develop -c dune runtest
version=0.26.1
profile=janestreet
ocaml-version=4.08.0
{
"ocaml.sandbox": {
"kind": "opam",
"switch": "4.14.1"
}
}
Unreleased
----------
* Add `Re.split_delim` (#233)
* Fix handling of empty matches in splitting and substitution functions (#233)
* Add support for character classes in `Re.Posix` (#263)
1.11.0 (19-Aug-2023)
--------------------
......
......@@ -23,7 +23,3 @@ release: ## Release on Opam
dune-release publish distrib --verbose
dune-release opam pkg
dune-release opam submit
.PHONY: nix
nix:
nix-shell -A resolve default.nix
open Core
open Core_bench
module List = ListLabels
module String = StringLabels
module Http = struct
open Re
let space = rep blank
let crlf = str "\r\n"
let token =
rep1 @@ compl [
rg '\000' '\031' ;
set "\127)(<>@,;:\\/[]?={}"
]
let token = rep1 @@ compl [ rg '\000' '\031'; set "\127)(<>@,;:\\/[]?={}" ]
let meth = token
let version =
let digits = rep1 digit in
let decimal = seq [digits ; opt (seq [char '.' ; digits])] in
seq [str "HTTP/" ; decimal]
let decimal = seq [ digits; opt (seq [ char '.'; digits ]) ] in
seq [ str "HTTP/"; decimal ]
;;
let uri = rep1 (compl [char '\n'])
let request_line =
[ space
; group meth
; space
; group uri
; group version
; space]
|> seq
let uri = rep1 (compl [ char '\n' ])
let request_line = [ space; group meth; space; group uri; group version; space ] |> seq
let header =
let key = group (rep1 (Re.compl [char ':'])) in
let value = group (rep1 (Re.compl [char '\n'])) in
seq [space ; key ; space ; char ':' ; space ; value ; space ; crlf]
let key = group (rep1 (Re.compl [ char ':' ])) in
let value = group (rep1 (Re.compl [ char '\n' ])) in
seq [ space; key; space; char ':'; space; value; space; crlf ]
;;
let request' = seq [request_line ; crlf ; rep header ; crlf ]
let request' = seq [ request_line; crlf; rep header; crlf ]
module Export = struct
let request = request'
let request_g = request' |> no_group
let requests = request' |> rep1
let requests_g = request' |> no_group |> rep1
end
end
let http_requests = In_channel.read_all "benchmarks/http-requests.txt"
let http_requests = Stdio.In_channel.read_all "benchmarks/http-requests.txt"
let str_20_zeroes = String.make 20 '0'
let re_20_zeroes = Re.(str str_20_zeroes)
let tex_ignore_re =
"benchmarks/tex.gitignore"
|> In_channel.read_lines
Stdio.In_channel.read_lines "benchmarks/tex.gitignore"
|> List.map ~f:(fun s ->
match String.lsplit2 s ~on:'#' with
| Some (pattern, _comment) -> pattern
| None -> s)
match Base.String.lsplit2 s ~on:'#' with
| Some (pattern, _comment) -> pattern
| None -> s)
|> List.filter_map ~f:(fun s ->
match String.strip s with
| "" -> None
| s -> Some s)
match Base.String.strip s with
| "" -> None
| s -> Some s)
|> List.map ~f:Re.Glob.glob
|> Re.alt
;;
let tex_ignore_filesnames = In_channel.read_lines "benchmarks/files"
let tex_ignore_filesnames = Stdio.In_channel.read_lines "benchmarks/files"
let lots_of_a's =
String.init 101 ~f:(function
| 100 -> 'b'
| _ -> 'a')
| 100 -> 'b'
| _ -> 'a')
;;
let lots_o_a's_re =
Re.(seq [char 'a' ; opt (char 'a') ; char 'b'])
let lots_o_a's_re = Re.(seq [ char 'a'; opt (char 'a'); char 'b' ])
let media_type_re =
let re = Re.Emacs.re ~case:true "[ \t]*\\([^ \t;]+\\)" in
Re.(seq ([start; re]))
Re.(seq [ start; re ])
;;
(* Taken from https://github.com/rgrinberg/ocaml-uri/blob/903ef1010f9808d6f3f6d9c1fe4b4eabbd76082d/lib/uri.ml*)
let uri_reference =
Re.Posix.re "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
;;
let uris =
[ "https://google.com"
; "http://yahoo.com/xxx/yyy?query=param&one=two"
; "file:/random_crap" ]
; "file:/random_crap"
]
;;
let benchmarks =
[ "20 zeroes", re_20_zeroes, [str_20_zeroes]
; "lots of a's", lots_o_a's_re, [lots_of_a's]
; "media type match", media_type_re, [" foo/bar ; charset=UTF-8"]
; "uri", uri_reference, uris ]
[ "20 zeroes", re_20_zeroes, [ str_20_zeroes ]
; "lots of a's", lots_o_a's_re, [ lots_of_a's ]
; "media type match", media_type_re, [ " foo/bar ; charset=UTF-8" ]
; "uri", uri_reference, uris
]
;;
let exec_bench exec name (re : Re.t) cases =
let re = Re.compile re in
Bench.Test.create_group ~name (
List.mapi cases ~f:(fun i case ->
let name = sprintf "case %i" i in
Bench.Test.create ~name (fun () -> ignore (exec re case))
)
)
Bench.Test.create_group
~name
(List.mapi cases ~f:(fun i case ->
let name = Printf.sprintf "case %i" i in
Bench.Test.create ~name (fun () -> ignore (exec re case))))
;;
let exec_bench_many exec name re cases =
let re = Re.compile re in
Bench.Test.create ~name (fun () ->
cases |> List.iter ~f:(fun x -> ignore (exec re x))
)
Bench.Test.create ~name (fun () -> List.iter cases ~f:(fun x -> ignore (exec re x)))
;;
let rec read_all_http pos re reqs =
if pos >= String.length reqs
then ()
else
else (
let g = Re.exec ~pos re reqs in
let (_, pos) = Re.Group.offset g 0 in
read_all_http (pos + 1) re reqs
let _, pos = Re.Group.offset g 0 in
read_all_http (pos + 1) re reqs)
;;
let rec drain_gen gen =
match gen () with
| Seq.Nil -> ()
| Cons (_, tail) -> drain_gen tail
;;
let string_traversal =
let open Bench in
let len = 1000 * 1000 in
let s = String.make len 'a' in
let re = Re.Pcre.regexp "aaaaaaaaaaaaaaaaz" in
Test.create ~name:"string traversal from #210" (fun () ->
try ignore (Re.execp re s ~pos:0) with
| Not_found -> ())
;;
let compile_clean_star =
let c = 'c' in
let s = String.make 10_000 c in
Bench.Test.create ~name:"kleene star compliation" (fun () ->
let re = Re.compile (Re.rep (Re.char 'c')) in
ignore (Re.execp re s))
;;
let benchmarks =
let benches =
benchmarks
|> List.map ~f:(fun (name, re, cases) ->
Bench.Test.create_group ~name
[ exec_bench Re.exec "exec" re cases
; exec_bench Re.execp "execp" re cases
; exec_bench Re.exec_opt "exec_opt" re cases ]
) in
Bench.Test.create_group
~name
[ exec_bench Re.exec "exec" re cases
; exec_bench Re.execp "execp" re cases
; exec_bench Re.exec_opt "exec_opt" re cases
])
in
let http_benches =
let open Bench in
let open Http.Export in
let manual =
[ request, "no group" ; request_g, "group" ]
[ request, "no group"; request_g, "group" ]
|> List.map ~f:(fun (re, name) ->
let re = Re.compile re in
Test.create ~name (fun () -> read_all_http 0 re http_requests)
)
|> Test.create_group ~name:"manual" in
let re = Re.compile re in
Test.create ~name (fun () -> read_all_http 0 re http_requests))
|> Test.create_group ~name:"manual"
in
let many =
let requests = Re.compile requests in
let requests_g = Re.compile requests_g in
[ Test.create ~name:"execp no group" (fun () ->
ignore (Re.execp requests http_requests)
)
ignore (Re.execp requests http_requests))
; Test.create ~name:"all_gen group" (fun () ->
http_requests
|> Re.Seq.all requests_g
|> drain_gen
)
] |> Test.create_group ~name:"auto" in
Test.create_group ~name:"http" [manual ; many] in
benches @ [
[ exec_bench_many Re.execp "execp"
; exec_bench_many Re.exec_opt "exec_opt" ]
|> List.map ~f:(fun f ->
f tex_ignore_re tex_ignore_filesnames)
|> Bench.Test.create_group ~name:"tex gitignore"
] @ [http_benches]
http_requests |> Re.Seq.all requests_g |> drain_gen)
]
|> Test.create_group ~name:"auto"
in
Test.create_group ~name:"http" [ manual; many ]
in
benches
@ [ [ exec_bench_many Re.execp "execp"; exec_bench_many Re.exec_opt "exec_opt" ]
|> List.map ~f:(fun f -> f tex_ignore_re tex_ignore_filesnames)
|> Bench.Test.create_group ~name:"tex gitignore"
]
@ [ http_benches ]
@ [ string_traversal ]
@ [ compile_clean_star ]
;;
let () = Command_unix.run (Bench.make_command benchmarks)
(executable
(libraries re threads core_bench core_unix.command_unix)
(name benchmark))
(executables
(libraries re core base stdio threads core_bench core_unix.command_unix)
(names benchmark memory))
(* This set of benchmarks is designed for testing re's memory usage rather than
speed. *)
module Bench = Core_bench.Bench
let size = 1_000
(* a pathological re that will consume a bunch of memory *)
let re =
let open Re in
compile @@ seq [ rep (set "01"); char '1'; repn (set "01") size (Some size) ]
;;
(* Another pathological case that is a simplified version of the above *)
let re2 =
let open Re in
seq [ rep (set "01"); char '1'; repn (set "01") size (Some size); char 'x' ] |> compile
;;
let str = "01" ^ String.make size '1'
let benchmarks =
[ "re", re; "re2", re2 ]
|> ListLabels.map ~f:(fun (name, re) ->
Bench.Test.create_indexed ~name ~args:[ 10; 20; 40; 80; 100; size ] (fun len ->
Base.Staged.stage (fun () ->
let len = Base.Int.min (String.length str) len in
ignore (Re.execp ~pos:0 ~len re str))))
;;
let () = Command_unix.run (Bench.make_command benchmarks)
ocaml-re (1.12.0-1) unstable; urgency=medium
* Team upload
* New upstream release
-- Stéphane Glondu <glondu@debian.org> Tue, 10 Sep 2024 08:08:29 +0200
ocaml-re (1.11.0-1) unstable; urgency=medium
* Team upload
......
# standalone derivation, for nix-build, nix-shell, etc
{ pkgs ? import <nixpkgs> {}, opam2nix ? import ./nix/opam2nix.nix }:
pkgs.callPackage ./nix { inherit opam2nix; }
(env
(_ (flags (:standard -w -50))))
\ No newline at end of file
(_
(flags
(:standard -w -50))))
(lang dune 2.0)
(name re)
(version 1.11.0)
(version 1.11.0-132-gf096726)
(implicit_transitive_deps false)
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nix-filter": {
"locked": {
"lastModified": 1710156097,
"narHash": "sha256-1Wvk8UP7PXdf8bCCaEoMnOT1qe5/Duqgj+rL8sRQsSM=",
"owner": "numtide",
"repo": "nix-filter",
"rev": "3342559a24e85fc164b295c3444e8a139924675b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "nix-filter",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1720283259,
"narHash": "sha256-AanFUMZl9eIjZUpph1q+4qixdGcBtb8gc2tRPOVviWA=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "c0acb02a188cb7e2406c50e6527b275a513d4d1b",
"type": "github"
},
"original": {
"owner": "nixos",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nix-filter": "nix-filter",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}
{
description = "ocaml-re flake";
inputs.nix-filter.url = "github:numtide/nix-filter";
inputs.flake-utils.url = "github:numtide/flake-utils";
inputs.nixpkgs.url = "github:nixos/nixpkgs";
outputs = { self, nixpkgs, flake-utils, nix-filter }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages."${system}";
inherit (pkgs.ocamlPackages) buildDunePackage;
in
rec {
packages = rec {
default = re;
re = buildDunePackage {
pname = "re";
version = "n/a";
src = ./.;
duneVersion = "3";
propagatedBuildInputs = with pkgs.ocamlPackages; [ seq ];
checkInputs = with pkgs.ocamlPackages; [ ounit ];
doCheck = true;
};
};
devShells.default = pkgs.mkShell {
inputsFrom = pkgs.lib.attrValues packages;
buildInputs = with pkgs.ocamlPackages; [ ocaml-lsp core_bench pkgs.ocamlformat_0_26_1 ];
};
});
}
open Import
type ('a, _) ast =
| Alternative : 'a list -> ('a, [> `Uncased ]) ast
| No_case : 'a -> ('a, [> `Cased ]) ast
| Case : 'a -> ('a, [> `Cased ]) ast
let empty_alternative : ('a, 'b) ast = Alternative []
let equal_ast (type a) eq (x : (a, [ `Uncased ]) ast) (y : (a, [ `Uncased ]) ast) =
match x, y with
| Alternative a, Alternative b -> List.equal ~eq a b
;;
let pp_ast (type a b) f fmt (ast : (a, b) ast) =
let open Fmt in
let var s re = sexp fmt s f re in
match ast with
| Alternative alt -> sexp fmt "Alternative" (list f) alt
| Case c -> var "Case" c
| No_case c -> var "No_case" c
;;
type cset =
| Cset of Cset.t
| Intersection of cset list
| Complement of cset list
| Difference of cset * cset
| Cast of (cset, [ `Cased | `Uncased ]) ast
type ('a, 'case) gen =
| Set of 'a
| Ast of (('a, 'case) gen, 'case) ast
| Sequence of ('a, 'case) gen list
| Repeat of ('a, 'case) gen * int * int option
| Beg_of_line
| End_of_line
| Beg_of_word
| End_of_word
| Not_bound
| Beg_of_str
| End_of_str
| Last_end_of_line
| Start
| Stop
| Group of string option * ('a, 'case) gen
| No_group of ('a, 'case) gen
| Nest of ('a, 'case) gen
| Pmark of Pmark.t * ('a, 'case) gen
| Sem of Automata.Sem.t * ('a, 'case) gen
| Sem_greedy of Automata.Rep_kind.t * ('a, 'case) gen
let rec pp_gen pp_cset fmt t =
let open Format in
let open Fmt in
let pp = pp_gen pp_cset in
let var s re = sexp fmt s pp re in
let seq s rel = sexp fmt s (list pp) rel in
match t with
| Set cset -> pp_cset fmt cset
| Sequence sq -> seq "Sequence" sq
| Repeat (re, start, stop) ->
let pp' fmt () = fprintf fmt "%a@ %d%a" pp re start optint stop in
sexp fmt "Repeat" pp' ()
| Beg_of_line -> str fmt "Beg_of_line"
| End_of_line -> str fmt "End_of_line"
| Beg_of_word -> str fmt "Beg_of_word"
| End_of_word -> str fmt "End_of_word"
| Not_bound -> str fmt "Not_bound"
| Beg_of_str -> str fmt "Beg_of_str"
| End_of_str -> str fmt "End_of_str"
| Last_end_of_line -> str fmt "Last_end_of_line"
| Start -> str fmt "Start"
| Stop -> str fmt "Stop"
| Group (None, c) -> var "Group" c
| Group (Some n, c) -> sexp fmt "Named_group" (pair str pp) (n, c)
| Nest c -> var "Nest" c
| Pmark (m, r) -> sexp fmt "Pmark" (pair Pmark.pp pp) (m, r)
| Ast a -> pp_ast pp fmt a
| Sem (sem, a) -> sexp fmt "Sem" (pair Automata.Sem.pp pp) (sem, a)
| Sem_greedy (k, re) -> sexp fmt "Sem_greedy" (pair Automata.Rep_kind.pp pp) (k, re)
| No_group c -> var "No_group" c
;;
let rec pp_cset fmt cset =
let open Fmt in
let seq s rel = sexp fmt s (list pp_cset) rel in
match cset with
| Cast s -> pp_ast pp_cset fmt s
| Cset s -> sexp fmt "Set" Cset.pp s
| Intersection c -> seq "Intersection" c
| Complement c -> seq "Complement" c
| Difference (a, b) -> sexp fmt "Difference" (pair pp_cset pp_cset) (a, b)
;;
let rec equal cset x1 x2 =
match x1, x2 with
| Set s1, Set s2 -> cset s1 s2
| Sequence l1, Sequence l2 -> List.equal ~eq:(equal cset) l1 l2
| Repeat (x1', i1, j1), Repeat (x2', i2, j2) ->
Int.equal i1 i2 && Option.equal Int.equal j1 j2 && equal cset x1' x2'
| Beg_of_line, Beg_of_line
| End_of_line, End_of_line
| Beg_of_word, Beg_of_word
| End_of_word, End_of_word
| Not_bound, Not_bound
| Beg_of_str, Beg_of_str
| End_of_str, End_of_str
| Last_end_of_line, Last_end_of_line
| Start, Start
| Stop, Stop -> true
| Group _, Group _ ->
(* Do not merge groups! *)
false
| Pmark (m1, r1), Pmark (m2, r2) -> Pmark.equal m1 m2 && equal cset r1 r2
| Nest x, Nest y -> equal cset x y
| Ast x, Ast y -> equal_ast (equal cset) x y
| Sem (sem, a), Sem (sem', a') -> Poly.equal sem sem' && equal cset a a'
| Sem_greedy (rep, a), Sem_greedy (rep', a') -> Poly.equal rep rep' && equal cset a a'
| _ -> false
;;
type t = (cset, [ `Cased | `Uncased ]) gen
type no_case = (Cset.t, [ `Uncased ]) gen
let pp = pp_gen pp_cset
let cset cset = Set (Cset cset)
let rec handle_case_cset ign_case = function
| Cset s -> if ign_case then Cset.case_insens s else s
| Cast (Alternative l) -> List.map ~f:(handle_case_cset ign_case) l |> Cset.union_all
| Complement l ->
List.map ~f:(handle_case_cset ign_case) l |> Cset.union_all |> Cset.diff Cset.cany
| Difference (r, r') ->
Cset.inter
(handle_case_cset ign_case r)
(Cset.diff Cset.cany (handle_case_cset ign_case r'))
| Intersection l -> List.map ~f:(handle_case_cset ign_case) l |> Cset.intersect_all
| Cast (No_case a) -> handle_case_cset true a
| Cast (Case a) -> handle_case_cset false a
;;
let rec handle_case ign_case : t -> (Cset.t, [ `Uncased ]) gen = function
| Set s -> Set (handle_case_cset ign_case s)
| Sequence l -> Sequence (List.map ~f:(handle_case ign_case) l)
| Ast (Alternative l) ->
let l = List.map ~f:(handle_case ign_case) l in
Ast (Alternative l)
| Repeat (r, i, j) -> Repeat (handle_case ign_case r, i, j)
| ( Beg_of_line
| End_of_line
| Beg_of_word
| End_of_word
| Not_bound
| Beg_of_str
| End_of_str
| Last_end_of_line
| Start
| Stop ) as r -> r
| Sem (k, r) -> Sem (k, handle_case ign_case r)
| Sem_greedy (k, r) -> Sem_greedy (k, handle_case ign_case r)
| Group (n, r) -> Group (n, handle_case ign_case r)
| No_group r -> No_group (handle_case ign_case r)
| Nest r -> Nest (handle_case ign_case r)
| Ast (Case r) -> handle_case false r
| Ast (No_case r) -> handle_case true r
| Pmark (i, r) -> Pmark (i, handle_case ign_case r)
;;
module Export = struct
type nonrec t = t
let pp = pp
let seq = function
| [ r ] -> r
| l -> Sequence l
;;
let str s =
let l = ref [] in
for i = String.length s - 1 downto 0 do
l := Set (Cset (Cset.csingle s.[i])) :: !l
done;
seq !l
;;
let as_set_elems elems =
match
List.map elems ~f:(function
| Set e -> e
| _ -> raise_notrace Exit)
with
| exception Exit -> None
| e -> Some e
;;
let empty : t = Ast empty_alternative
let alt (elems : t list) : t =
match elems with
| [] -> empty
| [ x ] -> x
| _ ->
(match as_set_elems elems with
| None -> Ast (Alternative elems)
| Some elems -> Set (Cast (Alternative elems)))
;;
let epsilon = seq []
let repn r i j =
if i < 0 then invalid_arg "Re.repn";
match j, i with
| Some j, _ when j < i -> invalid_arg "Re.repn"
| Some 0, 0 -> epsilon
| Some 1, 1 -> r
| _ -> Repeat (r, i, j)
;;
let rep r = repn r 0 None
let rep1 r = repn r 1 None
let opt r = repn r 0 (Some 1)
let bol = Beg_of_line
let eol = End_of_line
let bow = Beg_of_word
let eow = End_of_word
let word r = seq [ bow; r; eow ]
let not_boundary = Not_bound
let bos = Beg_of_str
let eos = End_of_str
let whole_string r = seq [ bos; r; eos ]
let leol = Last_end_of_line
let start = Start
let stop = Stop
type 'b f = { f : 'a. 'a -> ('a, 'b) ast }
let make_set f t =
match t with
| Set x -> Set (Cast (f.f x))
| _ -> Ast (f.f t)
;;
let preserve_set f t =
match t with
| Set _ -> t
| _ -> f t
;;
let longest = preserve_set (fun t -> Sem (`Longest, t))
let shortest = preserve_set (fun t -> Sem (`Shortest, t))
let first = preserve_set (fun t -> Sem (`First, t))
let greedy = preserve_set (fun t -> Sem_greedy (`Greedy, t))
let non_greedy = preserve_set (fun t -> Sem_greedy (`Non_greedy, t))
let group ?name r = Group (name, r)
let no_group = preserve_set (fun t -> No_group t)
let nest r = Nest r
let set str = cset (Cset.set str)
let mark r =
let i = Pmark.gen () in
i, Pmark (i, r)
;;
(**** Character sets ****)
let as_set_or_error name elems =
match as_set_elems elems with
| None -> invalid_arg name
| Some s -> s
;;
let inter elems = Set (Intersection (as_set_or_error "Re.inter" elems))
let compl elems = Set (Complement (as_set_or_error "Re.compl" elems))
let diff r r' =
match r, r' with
| Set r, Set r' -> Set (Difference (r, r'))
| _, _ -> invalid_arg "Re.diff"
;;
let case =
let f = { f = (fun r -> Case r) } in
fun t -> make_set f t
;;
let no_case =
let f = { f = (fun r -> No_case r) } in
fun t -> make_set f t
;;
let witness t =
let rec witness (t : no_case) =
match t with
| Set c -> String.make 1 (Cset.to_char (Cset.pick c))
| Sequence xs -> String.concat "" (List.map ~f:witness xs)
| Ast (Alternative (x :: _)) -> witness x
| Ast (Alternative []) -> assert false
| Repeat (r, from, _to) ->
let w = witness r in
let b = Buffer.create (String.length w * from) in
for _i = 1 to from do
Buffer.add_string b w
done;
Buffer.contents b
| No_group r -> witness r
| Sem_greedy (_, r) | Sem (_, r) | Nest r | Pmark (_, r) | Group (_, r) -> witness r
| Beg_of_line
| End_of_line
| Beg_of_word
| End_of_word
| Not_bound
| Beg_of_str
| Last_end_of_line
| Start
| Stop
| End_of_str -> ""
in
witness (handle_case false t)
;;
end
open Export
let rec merge_sequences = function
| [] -> []
| Ast (Alternative l') :: r -> merge_sequences (l' @ r)
| Sequence (x :: y) :: r ->
(match merge_sequences r with
| Sequence (x' :: y') :: r' when equal Cset.equal x x' ->
Sequence [ x; Ast (Alternative [ seq y; seq y' ]) ] :: r'
| r' -> Sequence (x :: y) :: r')
| x :: r -> x :: merge_sequences r
;;
(*XXX Use a better algorithm allowing non-contiguous regions? *)
let colorize color_map (regexp : no_case) =
let lnl = ref false in
let rec colorize regexp =
match (regexp : no_case) with
| Set s -> Color_map.split color_map s
| Sequence l -> List.iter ~f:colorize l
| Ast (Alternative l) -> List.iter ~f:colorize l
| Repeat (r, _, _) -> colorize r
| Beg_of_line | End_of_line -> Color_map.split color_map Cset.nl
| Beg_of_word | End_of_word | Not_bound -> Color_map.split color_map Cset.cword
| Beg_of_str | End_of_str | Start | Stop -> ()
| Last_end_of_line -> lnl := true
| No_group r | Group (_, r) | Nest r | Pmark (_, r) -> colorize r
| Sem (_, r) | Sem_greedy (_, r) -> colorize r
in
colorize regexp;
!lnl
;;
let rec anchored_ast : (t, _) ast -> bool = function
| Alternative als -> List.for_all ~f:anchored als
| No_case r | Case r -> anchored r
and anchored : t -> bool = function
| Ast a -> anchored_ast a
| Sequence l -> List.exists ~f:anchored l
| Repeat (r, i, _) -> i > 0 && anchored r
| No_group r | Sem (_, r) | Sem_greedy (_, r) | Group (_, r) | Nest r | Pmark (_, r) ->
anchored r
| Set _
| Beg_of_line
| End_of_line
| Beg_of_word
| End_of_word
| Not_bound
| End_of_str
| Last_end_of_line
| Stop -> false
| Beg_of_str | Start -> true
;;
let t_of_cset x = Set x
type ('a, _) ast = private
| Alternative : 'a list -> ('a, [> `Uncased ]) ast
| No_case : 'a -> ('a, [> `Cased ]) ast
| Case : 'a -> ('a, [> `Cased ]) ast
type cset = private
| Cset of Cset.t
| Intersection of cset list
| Complement of cset list
| Difference of cset * cset
| Cast of (cset, [ `Cased | `Uncased ]) ast
type ('a, 'case) gen = private
| Set of 'a
| Ast of (('a, 'case) gen, 'case) ast
| Sequence of ('a, 'case) gen list
| Repeat of ('a, 'case) gen * int * int option
| Beg_of_line
| End_of_line
| Beg_of_word
| End_of_word
| Not_bound
| Beg_of_str
| End_of_str
| Last_end_of_line
| Start
| Stop
| Group of string option * ('a, 'case) gen
| No_group of ('a, 'case) gen
| Nest of ('a, 'case) gen
| Pmark of Pmark.t * ('a, 'case) gen
| Sem of Automata.Sem.t * ('a, 'case) gen
| Sem_greedy of Automata.Rep_kind.t * ('a, 'case) gen
type t = (cset, [ `Cased | `Uncased ]) gen
type no_case = (Cset.t, [ `Uncased ]) gen
val pp : t Fmt.t
val merge_sequences : (Cset.t, [ `Uncased ]) gen list -> (Cset.t, [ `Uncased ]) gen list
val handle_case : bool -> t -> (Cset.t, [ `Uncased ]) gen
val anchored : t -> bool
val colorize : Color_map.t -> (Cset.t, [ `Uncased ]) gen -> bool
module Export : sig
type nonrec t = t
val empty : t
val epsilon : t
val str : string -> t
val no_case : t -> t
val case : t -> t
val diff : t -> t -> t
val compl : t list -> t
val repn : t -> int -> int option -> t
val inter : t list -> t
val set : string -> t
val mark : t -> Pmark.t * t
val nest : t -> t
val no_group : t -> t
val whole_string : t -> t
val leol : t
val longest : t -> t
val greedy : t -> t
val non_greedy : t -> t
val stop : t
val not_boundary : t
val group : ?name:string -> t -> t
val word : t -> t
val first : t -> t
val bos : t
val bow : t
val eow : t
val eos : t
val bol : t
val start : t
val eol : t
val opt : t -> t
val rep : t -> t
val rep1 : t -> t
val alt : t list -> t
val shortest : t -> t
val seq : t list -> t
val pp : t Fmt.t
val witness : t -> string
end
val cset : Cset.t -> t
val t_of_cset : cset -> t
This diff is collapsed.