Commit e24b2dc1 authored by Eugenio Cano-Manuel's avatar Eugenio Cano-Manuel

Imported Upstream version 0.1.0

parents
/target
/lib
/classes
/checkouts
/doc
pom.xml
*.jar
*.class
.lein-deps-sum
.lein-failures
.lein-plugins
.DS_Store
# Scout
Scout is a library for scanning through strings in a functional
way. There are many great parser libraries in Clojure, but I wrote
Scout to process text without having to fully parse it, either because
the text format made that difficult, or it would have been overkill
for that particular task. For example, it is used for parsing the
mustache templates in the
[Stencil](http://github.com/davidsantiago/stencil) library.
The key object in Scout is a Scanner object, which is an immutable
object associated with the string it is scanning and a position within
that string. There are two ways to make a Scanner: either create a new
one using the `scanner` function, or use a Scout function to create a
new Scanner as the result of a search from a previous Scanner. If a
Scanner is created as the result of a successful search, it will
contain a third piece of data, an object with information about the
parts of the string it matched and any regular expression groups that
were assigned as part of the match. Since Scanners are immutable, you
can easily start any number of searches from a given Scanner and refer
back to Scanner objects from earlier in the parsing process.
## Usage
[API Reference](http://davidsantiago.github.com/scout)
Suppose we wish to search a string for an emoticon. One way to do this would be
```clojure
user=> (require '[scout.core :as scout])
nil
user=> (-> (scout/scanner "Hi there. :)")
(scout/scan-until #":-?([()PD])"))
#scout.core.Scanner{:src "Hi there. :)", :curr-loc 12, :match #scout.core.MatchInfo{:start 10, :end 12, :groups [":)" ")"]}}
```
Here we used the `scan-until` function to search the string for the
next occurence of the regular expression we gave it. It found a match
at the end of the string, and returns a new scanner at position 12,
which is just past the end of the string (So `scout.core/end?` will
return true on the returned Scanner). Since there was a successful
match, there is an associated MatchInfo object telling us that the
match started at character 10, and ended at character 12, with the
groups the regular expression matched. The first group is always the
entire matching string, so that is ":)", and any following match
groups will be the ordered matches of the groups in the regular
expression (from left to right). In this case, the group we specified
to catch the mouth of the emoticon matched ")". Now we can grab that
information and figure out if the emoticon is happy:
```clojure
user=> (-> (scout/scanner "Hi there. :-)")
(scout/scan-until #":-?([()PD])")
(scout/groups)
(nth 1)
{")" :happy, "(" :sad, "P" :sad, "D" :happy})
:happy
```
There are many more functions in Scout to help you parse, including
functions to scan for a match only at a given position, look ahead,
find pieces of the string before and after a match, skip to the
beginning of a match, and more. Check the
[API Reference](http://davidsantiago.github.com/scout) for the
details.
## Obtaining
Add
[scout "0.1.0"]
to the `:dependencies` key of your Leiningen project map.
## License
Copyright © 2012 David Santiago
Distributed under the Eclipse Public License, the same as Clojure.
(defproject scout "0.1.0"
:description "Functional string searching and matching for parsing."
:url "http://example.com/FIXME"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.3.0"]]
:plugins [[codox "0.6.2-SNAPSHOT"]]
:codox {:src-dir-uri "http://github.com/davidsantiago/scout/blob/master/"
:src-linenum-anchor-prefix "L"})
(ns scout.core
(:refer-clojure :exclude [peek])
(:import java.util.regex.Matcher))
(defrecord MatchInfo [start ;; Start index of a match.
end ;; End index of a match.
groups]) ;; Vector of groups.
(defrecord Scanner [src ;; String to be matched.
curr-loc ;; Current location in string.
^MatchInfo match]) ;; Information about the last match.
(defn scanner
"Create a Scanner from a string. If the optional integer second argument is
provided, it sets the scanner's current position to that index. If the
optional third argument is provided, it must be a MatchInfo data structure
which will be used as the scanner's latest match info."
([source-string]
(scanner source-string 0 nil))
([source-string pos]
(scanner source-string pos nil))
([source-string pos match]
(Scanner. source-string pos match)))
(defn ^{:private true} re-groups-vec
"Clojure's re-groups will return a plain string if there is only one match
in the group. Really inconvenient for what we're doing, so here's a similar
function that always returns a vector of strings."
[^Matcher m]
(let [groupCount (.groupCount m)]
(loop [result (transient [])
i 0]
(if (<= i groupCount)
(recur (conj! result (.group m i))
(inc i))
(persistent! result)))))
(defn match-info
([^Matcher matcher]
(match-info (.start matcher)
(.end matcher)
(re-groups-vec matcher)))
([start end]
(match-info start end nil))
([start end groups]
(MatchInfo. start end groups)))
;;
;; Positional information. These functions act on a Scanner and return
;; the requested values.
;;
(defn position
"Returns the current position in the string (an integer index)."
[^Scanner scanner]
(:curr-loc scanner))
(defn beginning-of-line?
"Return true if the current position is the beginning of a line."
[^Scanner scanner]
(let [curr-loc (:curr-loc scanner)]
(or (= 0 curr-loc)
(= \newline (get (:src scanner) (dec curr-loc))))))
(defn end?
"Return true if the current position is the end of the input string."
[^Scanner scanner]
(>= (:curr-loc scanner) (count (:src scanner))))
(defn remainder
"Return what remains of the string after the scan pointer."
[^Scanner scanner]
(let [src (:src scanner)]
(subs src (min (:curr-loc scanner) (count src)))))
(defn groups
"Return the groups from the last match. Remember that the first group
will be the complete match."
[^Scanner scanner]
(get-in scanner [:match :groups]))
(defn matched
"Return the last matched string."
[^Scanner scanner]
(first (groups scanner)))
(defn pre-match
"Return the 'pre-match' of the last scan. This is the part of the input
before the beginning of the match."
[^Scanner scanner]
(let [match (:match scanner)]
(if match
(subs (:src scanner) 0 (:start match)))))
(defn post-match
"Return the 'post-match' of the last scan. This is the part of the input
after the end of the last match."
[^Scanner scanner]
(let [match (:match scanner)]
(if match
(subs (:src scanner) (:end match)))))
;;
;; Scanning/Advancing. These functions advance the scan pointer, returning a
;; Scanner object with the new configuration.
;;
(defn scan
"Match pattern starting at current location. On match, advances the
current location and puts the matched string in result. Otherwise,
just returns the same scanner, minus any previous match data."
[^Scanner s pattern]
(let [src (:src s)
;; Need to set the region to restrict the window the matcher
;; looks at to start at the current position.
matcher (.region (re-matcher pattern src)
(position s)
(count src))
match-result (if (.lookingAt matcher)
matcher)]
(if match-result
(let [mi (match-info matcher)
matched-string (first (:groups mi))]
(scanner src
(+ (position s) (count matched-string))
mi))
(assoc s :match nil))))
(defn scan-until
"Match pattern at any point after the current location. On match, advances
the current location to the end of the match, and puts just the matching
part in the match info. Otherwise, just returns the same scanner, minus
any previous match data."
[^Scanner s pattern]
(let [src (:src s)
matcher (.region (re-matcher pattern src)
(position s)
(count src))
match-result (if (.find matcher)
matcher)]
(if match-result
(let [mi (match-info matcher)]
(scanner src
(:end mi)
mi))
;; Remove the match data from the input scanner, since we failed to match.
(assoc s :match nil))))
(defn skip-to-match-start
"Match pattern at any point after the current location. On match, advances
the current location to the beginning of the match, so that a subsequent
scan with the same pattern will succeed. Matched pattern is stored in the
result. Otherwise, just returns the same scanner, minus any previous match
data."
[^Scanner s pattern]
(let [src (:src s)
scan-result (scan-until s pattern)
matched-string (matched scan-result)]
;; Note: scan-until may have failed, but the calculation below should work.
(scanner src
(- (position scan-result) (count matched-string))
(:match scan-result))))
;;
;; Looking ahead. These functions tell you about what is further ahead in
;; the string. Return the answers instead of a new Scanner.
;;
(defn check
"Returns what scan would return as its result."
[^Scanner s pattern]
(matched (scan s pattern)))
(defn check-until
"Returns what scan-until would return as its match."
[^Scanner s pattern]
(matched (scan-until s pattern)))
(defn check-until-inclusive
"Returns the string between the scanner's starting position and the end
of what scan-until would match."
[^Scanner s pattern]
(let [start-pos (position s)]
(subs (:src s) start-pos (position (scan-until s pattern)))))
(defn peek
"Returns the string containing the next n characters after current location."
[^Scanner s n]
(let [remainder (remainder s)]
(subs remainder 0 (min n
(count remainder)))))
(ns scout.test.core
(:use clojure.test)
(:require [scout.core :as scout]))
;;
;; Information access tests.
;;
(deftest test-position
(is (= 0 (scout/position (scout/scanner ""))))
(is (= 0 (scout/position (scout/scanner "test"))))
(is (= 1 (scout/position (scout.core.Scanner. "test" 1 nil)))))
(deftest test-beginning-of-line?
(is (= true (scout/beginning-of-line? (scout/scanner ""))))
(is (= true (scout/beginning-of-line? (scout/scanner "test"))))
(is (= false (scout/beginning-of-line?
(scout.core.Scanner. "test\r\ntest" 5 nil))))
(is (= true (scout/beginning-of-line?
(scout.core.Scanner. "test\r\ntest" 6 nil)))))
(deftest test-end?
(is (= true (scout/end? (scout/scanner ""))))
(is (= false (scout/end? (scout/scanner "test")))))
(deftest test-remainder
(is (= "test" (scout/remainder (scout/scanner "test"))))
(is (= "" (scout/remainder (scout/scanner ""))))
(is (= "" (scout/remainder (scout.core.Scanner. "test" 4 nil))))
(is (= "" (scout/remainder (scout.core.Scanner. "test" 5 nil)))))
(deftest test-groups
(is (= ["m"]
(scout/groups (scout/scanner "test" 0
(scout/match-info 0 1 ["m"]))))))
(deftest test-matched
(is (= "m"
(scout/matched (scout/scanner "test" 0
(scout/match-info 0 1 ["m"]))))))
(deftest test-pre-match
(is (= "beginn"
(scout/pre-match (scout/scanner "beginning" 9
(scout/match-info 6 8 ["in"])))))
(is (= "test"
(-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"\s+")
scout/pre-match))))
(deftest test-post-match
(is (= "ning"
(scout/post-match (scout/scanner "beginning" 5
(scout/match-info 3 5 ["in"])))))
(is (= "string"
(-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"\s+")
scout/post-match))))
;;
;; Scanning/Advancing tests.
;;
(deftest test-scan
(is (= "t"
(-> (scout/scanner "test")
(scout/scan #"t")
scout/matched)))
(is (= 1 (-> (scout/scanner "test")
(scout/scan #"t")
scout/position)))
(is (= "test"
(-> (scout/scanner "test")
(scout/scan #"test")
scout/matched)))
(is (scout/end? (scout/scan (scout/scanner "test") #"test")))
(is (= ["t"]
(-> (scout/scanner "test-string")
(scout/scan #"t")
scout/groups)))
;; Compounded scans should work.
(is (= 5
(-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"\s+")
scout/position)))
(is (= 4
(-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"\s+")
(get-in [:match :start]))))
(is (= 5
(-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"\s+")
(get-in [:match :end]))))
;; Failing to match shoud leave us in the same position
(is (= 0 (scout/position (scout/scan (scout/scanner "testgoal")
#"notinthestring"))))
;; Failing to match should remove pre-existing match data.
(is (= nil (-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan #"notinthestring")
(get :match)))))
(deftest test-scan-until
(is (= "goal" (-> (scout/scanner "testgoal")
(scout/scan-until #"goal")
scout/matched)))
(is (= 8 (-> (scout/scanner "testgoal")
(scout/scan-until #"goal")
scout/position)))
(is (= "goal"
(-> (scout/scanner "goal")
(scout/scan-until #"goal")
scout/matched)))
(is (scout/end? (scout/scan-until (scout/scanner "goal") #"goal")))
(is (scout/end? (scout/scan-until (scout/scanner "testgoal") #"goal")))
(is (= ["s"] (-> (scout/scanner "test string")
(scout/scan-until #"s")
scout/groups)))
;; Compounded scan-untils should work.
(is (= 8 (-> (scout/scanner "test string")
(scout/scan-until #"s")
(scout/scan-until #"r")
scout/position)))
(is (= 7 (-> (scout/scanner "test string")
(scout/scan-until #"s")
(scout/scan-until #"r")
(get-in [:match :start]))))
(is (= 8 (-> (scout/scanner "test-string")
(scout/scan-until #"s")
(scout/scan-until #"r")
(get-in [:match :end]))))
;; Failing to match should leave us in the same position.
(is (= 0 (-> (scout/scanner "testgoal")
(scout/scan-until #"notinthestring")
scout/position)))
;; Failing to match should remove pre-existing match data.
(is (= nil (-> (scout/scanner "test string")
(scout/scan #"test")
(scout/scan-until #"notinthestring")
(get :match)))))
(deftest test-skip-to-match-start
(is (= "goal"
(-> (scout/scanner "testgoal")
(scout/skip-to-match-start #"goal")
scout/matched)))
(is (= 4 (-> (scout/scanner "testgoal")
(scout/skip-to-match-start #"goal")
scout/position)))
(is (= "goal"
(-> (scout/scanner "goal")
(scout/skip-to-match-start #"goal")
scout/matched)))
;; Calling scan on result of skip-to-match-start should work.
(is (= "goal"
(-> (scout/scanner "testgoal")
(scout/skip-to-match-start #"goal")
(scout/scan #"goal")
scout/matched)))
(is (-> (scout/scanner "testgoal")
(scout/skip-to-match-start #"goal")
(scout/scan #"goal")
scout/end?))
;; Failing to match should leave us in the same position.
(is (= 0 (-> (scout/scanner "testgoal")
(scout/skip-to-match-start #"yes")
scout/position)))
;; Failing to match should remove pre-existing match data.
(is (= nil (-> (scout/scanner "test string")
(scout/scan #"test")
(scout/skip-to-match-start #"notinthestring")
(get :match)))))
;;
;; Look-ahead tests.
;;
(deftest test-check
(is (= "t"
(scout/check (scout/scanner "test") #"t")))
(is (= "test"
(scout/check (scout/scanner "test") #"test"))))
(deftest test-check-until
(is (= "goal"
(scout/check-until (scout/scanner "testgoal") #"goal")))
(is (= "goal"
(scout/check-until (scout/scanner "goal") #"goal"))))
(deftest test-check-until-inclusive
(is (= "testgoal"
(scout/check-until-inclusive (scout/scanner "testgoal") #"goal")))
(is (= "goal"
(scout/check-until-inclusive (scout/scanner "goal") #"goal"))))
(deftest test-peek
(is (= "t"
(scout/peek (scout/scanner "test") 1)))
(is (= "test"
(scout/peek (scout/scanner "test") 4)))
(is (= "test"
(scout/peek (scout/scanner "test") 500))))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment