Commit d811784f authored by Anthony Fok's avatar Anthony Fok

Update upstream source from tag 'upstream/0.1.0'

Update to upstream version '0.1.0'
with Debian dir cfb9cc074223bb40bbdd5bb2f600520711d8fe2e
parents 107fa129 6285414d
/*
Agg computes aggregate values over tabular text.
It behaves somewhat like the SQL “GROUP BY” clause.
Usage:
agg [function...]
It reads input from stdin as a sequence of records, one per line.
It treats each line as a set of fields separated by white space.
One field (the first, by default) is designated as the key.
Successive lines with equal keys are grouped into a group,
and agg produces one line of output for each group.
(Note that only contiguous input lines can form a group.
If you need to make sure that all records for a given key
are grouped together, sort the input first.)
For each remaining field,
agg applies a function to all the values in the group,
producing a single output value.
The command line arguments specify which functions to use,
one per field in the input table.
Functions
The available functions are:
key group by this field (default for field 1)
first value from first line of group (default for rest)
last value from last line of group
sample value from any line of group, uniformly at random
prefix longest common string prefix
join:sep concatenate strings with given sep
smin lexically least string
smax lexically greatest string
min numerically least value
max numerically greatest value
sum numeric sum
mean arithmetic mean
count number of records (ignores input value)
const:val print val, ignoring input
drop omit the column entirely
The numeric functions skip items that don't parse as numbers.
Examples
Using the following input:
$ cat >input
-rwx alice 100 /home/alice/bin/crdt
-rw- alice 210002 /home/alice/thesis.tex
-rw- bob 10051 /home/bob/expenses.tab
-rwx kr 862060 /home/kr/bin/blog
-rwx kr 304608 /home/kr/bin/agg
Disk usage for each user, plus where that disk usage occurs
(longest common prefix of filesystem paths):
$ agg <input drop key sum prefix
alice 210153 /home/alice/
bob 10051 /home/bob/expenses.tab
kr 1166668 /home/kr/
Disk usage for executable vs non-executable files:
$ sort input | agg key drop sum join:,
-rw- 220053 /home/alice/thesis.tex,/home/bob/expenses.tab
-rwx 1166768 /home/alice/bin/crdt,/home/kr/bin/agg,/home/kr/bin/blog
*/
package main
package main
// TODO(kr): tests
import (
"bufio"
"fmt"
"log"
"math/rand"
"os"
"strings"
"time"
)
type agg interface {
merge(string)
String() string
}
var (
key = 0
funcmap = make(map[int]func(init, arg string) agg)
argmap = make(map[int]string)
symtab = map[string]func(init, arg string) agg{
"first": first,
"last": last,
"prefix": prefix,
"sample": sample,
"join": join,
"smin": smin,
"smax": smax,
"min": min,
"max": max,
"sum": sum,
"mean": mean,
"count": count,
"const": constf,
"drop": nil,
}
)
func main() {
log.SetPrefix("agg: ")
log.SetFlags(0)
rand.Seed(time.Now().UnixNano())
for i, sym := range os.Args[1:] {
if p := strings.IndexByte(sym, ':'); p >= 0 {
sym, argmap[i] = sym[:p], sym[p+1:]
}
if sym == "key" {
key, sym = i, "first"
}
f, ok := symtab[sym]
if !ok {
log.Fatalf("bad function: %q", sym)
}
funcmap[i] = f
}
sc := bufio.NewScanner(os.Stdin)
var g *group
for sc.Scan() {
ss := strings.Fields(sc.Text())
if !matches(g, ss) {
emit(g)
g = &group{key: ss[key]}
}
mergeLine(g, ss)
}
emit(g)
}
type group struct {
key string
agg []agg
}
func matches(g *group, ss []string) bool {
return g != nil && g.key == ss[key]
}
func emit(g *group) {
if g == nil {
return
}
rest := false
for i, a := range g.agg {
if f, ok := funcmap[i]; ok && f == nil {
continue
}
if rest {
fmt.Print("\t")
}
rest = true
fmt.Print(a)
}
fmt.Println()
}
func mergeLine(g *group, ss []string) {
for i, s := range ss {
if i >= len(g.agg) {
f := funcmap[i]
if f == nil {
f = first
}
g.agg = append(g.agg, f(s, argmap[i]))
} else {
g.agg[i].merge(s)
}
}
}
package main
import (
"math/big"
"strconv"
)
func min(s, arg string) agg { return newBinop(s, opmin) }
func max(s, arg string) agg { return newBinop(s, opmax) }
func sum(s, arg string) agg { return newBinop(s, opsum) }
type binop struct {
v *big.Float
f func(a, b *big.Float) *big.Float
}
func newBinop(s string, f func(a, b *big.Float) *big.Float) *binop {
v, _ := parseFloat(s)
return &binop{v, f}
}
func (o *binop) String() string {
if o.v == nil {
return "NaN"
}
return o.v.Text('f', -1)
}
func (o *binop) merge(s string) {
v, ok := parseFloat(s)
if !ok {
return
}
o.v = o.f(o.v, v)
}
func opmin(a, b *big.Float) *big.Float {
if a != nil && (b == nil || a.Cmp(b) <= 0) {
return a
}
return b
}
func opmax(a, b *big.Float) *big.Float {
if a != nil && (b == nil || a.Cmp(b) >= 0) {
return a
}
return b
}
func opsum(a, b *big.Float) *big.Float {
if a == nil {
return b
} else if b == nil {
return a
}
return a.Add(a, b)
}
type meanagg struct {
v *big.Float
d float64 // actually an integer
}
func mean(s, arg string) agg {
v, ok := parseFloat(s)
if !ok {
return &meanagg{new(big.Float), 0}
}
return &meanagg{v, 1}
}
func (m *meanagg) String() string {
if m.d == 0 {
return "NaN"
}
v := new(big.Float).Quo(m.v, big.NewFloat(m.d))
return v.Text('f', -1)
}
func (m *meanagg) merge(s string) {
v, ok := parseFloat(s)
if !ok {
return
}
m.v.Add(m.v, v)
m.d++
}
func parseFloat(s string) (*big.Float, bool) {
v, _, err := big.ParseFloat(s, 0, 1000, big.ToNearestEven)
return v, err == nil
}
type counter int
func count(init, arg string) agg { return new(counter) }
func (c *counter) String() string { return strconv.Itoa(int(*c) + 1) }
func (c *counter) merge(string) { *c++ }
package main
import (
"math/rand"
"strings"
)
func first(s, arg string) agg { return &sbinop{s, opfirst} }
func last(s, arg string) agg { return &sbinop{s, oplast} }
func prefix(s, arg string) agg { return &sbinop{s, opprefix} }
func join(s, arg string) agg { return &sbinop{s, opjoin(arg)} }
func smin(s, arg string) agg { return &sbinop{s, opsmin} }
func smax(s, arg string) agg { return &sbinop{s, opsmax} }
type sbinop struct {
s string
f func(a, b string) string
}
func (o *sbinop) String() string { return o.s }
func (o *sbinop) merge(s string) { o.s = o.f(o.s, s) }
func opfirst(a, b string) string { return a }
func oplast(a, b string) string { return b }
func opprefix(a, b string) string {
for i := range a {
if i >= len(b) || a[i] != b[i] {
return a[:i]
}
}
return a
}
func opjoin(sep string) func(a, b string) string {
return func(a, b string) string {
return a + sep + b // TODO(kr): too slow? maybe strings.Join?
}
}
func opsmin(a, b string) string {
if strings.Compare(a, b) <= 0 {
return a
}
return b
}
func opsmax(a, b string) string {
if strings.Compare(a, b) >= 0 {
return a
}
return b
}
type sampler struct {
n int
s string
}
func sample(s, arg string) agg { return &sampler{1, s} }
func (p *sampler) String() string { return p.s }
func (p *sampler) merge(s string) {
p.n++
if rand.Intn(p.n) == 0 {
p.s = s
}
}
type constant string
func constf(init, arg string) agg { return constant(arg) }
func (c constant) String() string { return string(c) }
func (c constant) merge(string) {}
module "github.com/kr/text"
require "github.com/kr/pty" v1.1.1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment