#lang scheme #| tinygp.ss: A "work-sortof-alike" of Riccardo Poli's "tiny_gp" genetic programming system for PLT Scheme. Lee Spector (lspector@hampshire.edu), January 2010 This is a rough and approximate implementation of Riccardo Poli's tiny_gp system, which is available from http://cswww.essex.ac.uk/staff/rpoli/TinyGP/, for PLT Scheme, which is available from http://plt-scheme.org. Riccardo's tiny_gp was first written in C, but the present code was based (roughly) on his subsequent Java version. It is a "work-alike" insofar as it takes input files of the same format, uses essentially the same genetic programming algorithms, and produces approximately the same output. But it is not a direct translation and in some cases different data structures are used, different output messages are provided, etc. For example, programs here are represented as Scheme symbolic expressions, not using a flattened representation, and are "pretty printed" in standard prefix notation. This was written for pedagogical purposes and is intended to be more clear to a reader than Riccardo's Java version, but little attention was paid here to efficiency or to the "tiny" goals that motivated Riccardo's work. Nonetheless some of Riccardo's design choices, for example some uses of globals, which were made by him for reasons that are not really relevant here (for example to reduce the size of the executable) were retained just as result of the translation process but don't really make much sense otherwise. TO RUN In the DrScheme IDE application that comes with PLT Scheme (http://plt-scheme.org): - Open this file in DrScheme. - Edit the call to "main" at the bottom to have the correct input file name. - If desired, add second argument to main, a seed for the random number generator. - Hit the "Run" button or select Run from the Scheme menu. In the mzscheme command-line program that comes with PLT Scheme (http://plt-scheme.org): - Edit the call to "main" at the bottom of this file to have the correct input file name. - If desired, add second argument to main, a seed for the random number generator. - Ensure that the directory containing mzscheme is in your path (or set up an alias, etc.). - Type the following to your command line: mzscheme tinygp.ss In other Schemes most of this will probably work -- although I haven't tried it -- but you'll probably have to make a few changes including removing the #lang line, getting rid of the namespace declaration, and changing the code for printing parameters (which here uses eval in conjunction with the PLT namespace declaration). NOTE This is indended to be used for demonstration purposes only. It is not an industrial strength genetic programming system and it has many severe limitations, the most obvious of which is that all functions in function set are assumed to be binary. |# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; parameters (define min-random -100.0) ;; actual value will be read from file (define max-random 100.0) ;; actual value will be read from file (define population-size 1000) ;; Poli default is 100000 (define max-depth 5) (define max-generations 100) (define tournament-size 2) (define mutation-per-node-probability 0.05) (define crossover-probability 0.9) (define function-set '(+ - * %)) ;; note: all functions are assumed to be binary (define tinygp-parameters ;; this just makes it simpler to print all of the parameter values '(min-random max-random population-size max-depth max-generations tournament-size mutation-per-node-probability crossover-probability)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; other globals (define population-fitnesses #f) ;; will be a vector later (define population #f) ;; will be a vector later (define program #f) ;; will be a list (define constants #f) ;; will be a list later (define inputs #f) ;; will be a vector later (define input-symbols #f) ;; will be a list later (define num-inputs 0) ;; num inputs, will be read from file (define num-fitness-cases 0) ;; num fitness cases, will be read from file (define num-constants 0) ;; number of available constants, from file (define fitness-cases #f) ;; will be a 2D array of fitness cases, D1=case, D2=input,input,...,output (define best-fitness -1.0e34) (define-namespace-anchor tinygp) (define tinygp-namespace (namespace-anchor->namespace tinygp)) ;; for eval, used only to print parameters ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; general purpose utilities (define random-element (lambda (lst) (list-ref lst (random (length lst))))) (define position (lambda (thing lst (pos 0)) (if (null? lst) #f (if (equal? thing (first lst)) pos (position thing (rest lst) (+ pos 1)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; tinygp-specific code (define process-input-file (lambda (fname) (with-input-from-file fname (lambda () (set! num-inputs (read)) (set! num-constants (read)) (set! min-random (read)) (set! max-random (read)) (set! num-fitness-cases (read)) (set! fitness-cases (build-vector num-fitness-cases (lambda (i) (make-vector (+ num-inputs 1))))) (for ((i (in-range 0 num-fitness-cases))) (for ((j (in-range 0 (+ num-inputs 1)))) (vector-set! (vector-ref fitness-cases i) j (read)))) (set! constants (build-list num-constants (lambda (i) (+ min-random (* (- max-random min-random) (random)))))) (set! inputs (make-vector num-inputs 0.0)) (set! input-symbols (for/list ((i (in-range num-inputs))) (string->symbol (format "i~A" i)))))))) ;(process-input-file "sin-data.txt") ;fitness-cases (define random-program (lambda (max-d) (if (or (<= max-d 0) (< (random) 0.5)) (random-element (append constants input-symbols)) (let ((f (random-element function-set))) ;; assume only binary functions (list f (random-program (- max-d 1)) (random-program (- max-d 1))))))) (define % ;; protected division (lambda (num den) (if (<= (abs den) 0.001) num (/ num den)))) (define fast-eval (lambda (pgm) (if (pair? pgm) ;; assume only binary functions ((case (first pgm) ;; and only these ((+) +) ((-) -) ((*) *) ((%) %)) (fast-eval (second pgm)) (fast-eval (third pgm))) (let ((input-symbol-index (position pgm input-symbols))) (if input-symbol-index (vector-ref inputs input-symbol-index) pgm))))) (define fitness (lambda (pgm) (let ((total 0.0)) (for ((case (in-range num-fitness-cases))) (for ((i (in-range 0 num-inputs))) (vector-set! inputs i (vector-ref (vector-ref fitness-cases case) i))) (set! total (+ total (abs (- (fast-eval pgm) (vector-ref (vector-ref fitness-cases case) num-inputs)))))) (- total)))) (define print-stats (lambda (gen) (let ((best-index 0) (best-length 0) (avg-fitness 0) (avg-length 0)) (set! best-fitness -1e34) (for ((i (in-range 0 population-size))) (set! avg-fitness (+ avg-fitness (vector-ref population-fitnesses i))) (set! avg-length (+ avg-length (length (flatten (vector-ref population i))))) (when (> (vector-ref population-fitnesses i) best-fitness) (set! best-index i) (set! best-fitness (vector-ref population-fitnesses i)) (set! best-length (length (flatten (vector-ref population i)))))) (set! avg-fitness (/ avg-fitness population-size 1.0)) (set! avg-length (/ avg-length population-size 1.0)) (printf "\n\nGeneration= ~A, Avg Fitness= ~A, Avg Size= ~A, Best Fitness= ~A, Best Size=~A" gen avg-fitness avg-length best-fitness best-length) (printf "\nBest Individual:\n") (pretty-print (vector-ref population best-index))))) (define tournament ;; returns an index (lambda () (let ((best-so-far 0) (best-fitness-so-far -1.0e34)) (for ((i (in-range 0 tournament-size))) (let ((competitor (random population-size))) (when (> (vector-ref population-fitnesses competitor) best-fitness-so-far) (set! best-so-far competitor) (set! best-fitness-so-far (vector-ref population-fitnesses competitor))))) best-so-far))) (define negative-tournament ;; returns an index (lambda () (let ((worst-so-far 0) (worst-fitness-so-far 1.0e34)) (for ((i (in-range 0 tournament-size))) (let ((competitor (random population-size))) (when (< (vector-ref population-fitnesses competitor) worst-fitness-so-far) (set! worst-so-far competitor) (set! worst-fitness-so-far (vector-ref population-fitnesses competitor))))) worst-so-far))) (define count-points (lambda (pgm) (if (pair? pgm) (+ 1 (apply + (map count-points (rest pgm)))) 1))) ;; (count-points '(+ (* 4 (- x 2)) 1)) (define code-at-point ;; assumes index is valid (lambda (pgm index) (if (zero? index) pgm (let ((points-in-first-arg (count-points (second pgm)))) (if (<= index points-in-first-arg) (code-at-point (second pgm) (- index 1)) (code-at-point (cons (first pgm) (rest (rest pgm))) (- index points-in-first-arg))))))) ;; (for ((i (in-range 7))) (printf "\n~A" (code-at-point '(+ (* 4 (- x 2)) 1) i))) (define insert-code-at-point ;; assumes index is valid AND that all functions are binary! (lambda (pgm index new-part) (if (zero? index) new-part (let ((points-in-first-arg (count-points (second pgm)))) (if (<= index points-in-first-arg) (cons (first pgm) (cons (insert-code-at-point (second pgm) (- index 1) new-part) (rest (rest pgm)))) (list (first pgm) (second pgm) (insert-code-at-point (third pgm) (- index points-in-first-arg 1) new-part))))))) ;(for ((i (in-range 7))) ; (printf "\n~A" ; (insert-code-at-point '(+ (* 4 (- x 2)) 1) i 'XXX))) (define crossover ;; takes 2 programs and returns a program (lambda (p1 p2) (insert-code-at-point p1 (random (count-points p1)) (code-at-point p2 (random (count-points p2)))))) (define mutate (lambda (p) (let ((len (count-points p))) (for ((i (in-range 0 len))) (when (< (random) mutation-per-node-probability) (set! p (let ((current-code-at-mutation-point (code-at-point p i))) (insert-code-at-point p i (if (pair? current-code-at-mutation-point) (cons (random-element function-set) (rest current-code-at-mutation-point)) (random-element (append constants input-symbols))))))))) p)) ;; (display (mutate '(+ (+ (+ (+ (+ (+ 0 0) (+ 0 0)) (+ 0 0)) (+ 0 0)) (+ 0 0)) (+ 0 0)))) (define evolve (lambda () (print-stats 0) (for ((gen (in-range 1 (+ max-generations 1)))) (cond ((> best-fitness -1e-5) (printf "\nPROBLEM SOLVED") (exit)) (else (for ((i (in-range 0 population-size))) (let* ((new-program (if (< (random) crossover-probability) (let ((parent1 (tournament)) (parent2 (tournament))) (crossover (vector-ref population parent1) (vector-ref population parent2))) (let ((parent (tournament))) (mutate (vector-ref population parent))))) (new-fitness (fitness new-program)) (offspring (negative-tournament))) (vector-set! population offspring new-program) (vector-set! population-fitnesses offspring new-fitness))))) (print-stats gen)) (printf "\nPROBLEM *NOT* SOLVED\n") (exit))) (define print-parameters (lambda () (printf "\n-- TINY GP (Scheme version) --") (for ((p tinygp-parameters)) (printf "\n~A=~A" p (eval p tinygp-namespace))) (printf "\n----------------------------------\n"))) (define main (lambda ((fname "problem.dat") (seed -1)) (when (>= seed 0) (random-seed seed)) (process-input-file fname) ;; create initial population (set! population (make-vector population-size)) (set! population-fitnesses (make-vector population-size)) (for ((i (in-range population-size))) (vector-set! population i (random-program max-depth)) (vector-set! population-fitnesses i (fitness (vector-ref population i)))) ;; print parameters (print-parameters) ;; evolve (evolve) )) (main "sin-data.txt")