#!/bin/sh -f

### Make perceptron data:
### Two classes, each gaussian

## number of patterns in each class
nc1=100
nc2=100

## class means
c1xm=1
c1ym=1
c2xm=-1
c2ym=-1

## class standard deviations
c1xs=2
c1ys=0.5
c2xs=2
c2ys=0.5

## generate data
randg -mean $c1xm -stdev $c1xs -rows $nc1 > tmp-c1x
randg -mean $c1ym -stdev $c1ys -rows $nc1 > tmp-c1y
randg -mean $c2xm -stdev $c2xs -rows $nc2 > tmp-c2x
randg -mean $c2ym -stdev $c2ys -rows $nc2 > tmp-c2y

## agglomerate x/y coordinates & target outputs (class id)
paste tmp-c1x tmp-c1y | awk '{print $0,  1}' > tmp-c1
paste tmp-c2x tmp-c2y | awk '{print $0, -1}' > tmp-c2

## concatenate & shuffle to generate training set
cat tmp-c1 tmp-c2 | rl > training-set

## Notes

# The rl (randomize lines) utility is in the debian package
# randomize-lines, http://packages.debian.org/randomize-lines, but you
# can easily do the same thing by prepending each line with a random
# number, sorting, and then stripping them off.

# $ gnuplot
# gnuplot> p "tmp-c1" w points 2, "tmp-c2" w points 3

# $ ./perceptron1 < training-set | awk '{print $1}' | uniq --count