;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; lgp.lisp ;; a Linear Genetic Programming system ;; c) Lee Spector, 1999 ;; lspector@hampshire.edu ;; ;; version 1.19990329 (n.yyyymmdd) #| This is a linear-program-based steady-state genetic programming system in Common Lisp. It was written with several goals in mind, among them: - to support work on the evolution of quantum algorithms - to support introductory instruction in genetic algorithms/programming Features: - small, simple, and clear (I hope) - linear programs - steady state, after a fashion - lexicographic fitness - easy to add a stack for stack-based genetic programming - easy to add "local search" operators - easy to immigration/emigration for clustered computing - easy to use interactive fitness evaluation, e.g. for evolving artworks, by making REPLACEMENT-TOURNAMENT interactive To use for your own application: - load random.cl - load this file (lgp.lisp) - define any application-specific variables and utilities - provide an appropriate value for *gp-params* - re-define REPLACEMENT-TOURNAMENT (which includes fitness evaluation) - evaluate (evolve) to run the system Miscellaneous notes: - Individuals are represented as lists of: (fitness-list program) - Fitness evaluation happens only in REPLACEMENT-TOURNAMENT, which must be redefined by the user for each application. - Fitness lists are lists of numbers, with smaller always indicating better. Fitness list F1 is "better" than fitness list F2 if its first number is lower than that of F2. If the first numbers are equivalent then the comparison moves on to the second elements, etc. The number of elements in fitness lists should be the same throughout an application. - Selection-tournament considers only pre-computed fitness values, with un-evaluated individuals always winning. - The MINIMIZATION and PAIR-MINIMIZATION genetic operators require the definition of a fitness function. You can use such a function both in your version of REPLACEMENT-TOURNAMENT and in the definition of specialized minimization operators that you can include in *genetic-operators*. - Program size limits are enforced after the execution of the genetic operators, so if if you write a new genetic operator that might increase the length of a program then it should not presume to provide a fitness value (it'll be computed later in replacement-tournament). - This uses Chris McConnell's random.cl random number generator to allow for replication of experiments based on integer seed values. To ensure replicability do not use the built-in Common Lisp RANDOM function -- use only RANDINT (defined below) or other functions from random.cl. Random.cl can be obtained from the CMU Lisp Repository, at http://www.mit.edu/afs/cs.cmu.edu/project/ ai-repository/ai/lang/lisp/code/math/random/ |# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; globals and parameters (defvar *instruction-pointer* 0 "The index of the currently executing instruction. Can be manipulated to implement control structures.") (defvar *population* nil "An array containing all individuals in the current population.") (defvar *gp-params* nil "A list of (param value) which is used to configure the genetic programming system at the start of a run. See the demo at the bottom of lgp.lisp for an example of the use of *gp-params*.") (defparameter *lgp-output-path* "lgp-output" "The path (full directory/filename specification) for lgp output. Lgp sends output both to standard output (usually the terminal/listener) and to this file. Old versions of the output file are *not* deleted -- new output is appended to the contents of the old file. Use a value of nil if you don't want any file output.") (defvar *instruction-generators* nil "A list of forms which are EVALd to produce instruction/arg lists (program elements). List an item multiple times if you want it to have a higher probability of being used. This parameter and its value should be included in *gp-params*.") (defvar *genetic-operators* nil "A list of genetic operator functions, each of which should return a new individual for possible insertion in the population. This parameter and its value should be included in *gp-params*.") (defvar *population-size* 0 "The number of individuals in the population. This parameter and its value should be included in *gp-params*.") (defvar *selection-tournament-size* 1 "The number of individuals that are considered when selecting an individual from the population (with random-individual) in genetic operators. This parameter and its value should be included in *gp-params*.") (defvar *best-individual* nil "The best individual seen so far in the current run. This parameter and its value should be included in *gp-params*. You probably want to use some arbitrary terrible individual as the initial value. Remember that individuals are lists of (fitness-list program) and that the fitness-list part of this is itself a list of fitness components (numbers), with smaller numbers being better.") (defvar *halting-fitness* nil "A fitness list that is considered good enough to stop the genetic programming system run. This parameter and its value should be included in *gp-params*.") (defvar *report-every* 1 "A positive integer that indicates how frequently the lgp system should print a progress report. Set to the number of cycles (operator applications) that you want between reports. Set to 0 if you don't want any reports before termination. This parameter and its value should be included in *gp-params*.") (defvar *max-initial-program-length* 1 "The maximum length of a newly-generated program. This parameter and its value should be included in *gp-params*.") (defvar *max-program-length* 1 "The maximum length of any lgp-evolved program. This parameter and its value should be included in *gp-params*.") (defvar *random-seeds* (list 0 0) "A list of two integers, the first of which must be 0 <= n <= 31328, and the second of which must be 0 <= n <= 30081. These are used to seed the random number generator. This parameter and its value should be included in *gp-params*.") (defvar *initialization-forms* nil "A list of forms that will be evaluated prior to the start of a genetic programming run. Use this to set up additional globals for fitness evaluation functions, etc. This parameter and its value should be included in *gp-params*.") (defvar *percent-losers-win* 0 "The percentage of times that the 'loser' will nonetheless replace the 'winner' in a replacement tournament. NOTE that you must write your version of replacement-tournament to implement this. See the demo code at the bottom of lgp.lisp for an example of how to do this. This parameter and its value should be included in *gp-params*.") (defvar *lexicographic-fitness-epsilon* 0 "The 'margin of equivalence' for fitness component comparisons. If two fitness components differ by less than this value then they will be considered equivalent. This parameter and its value should be included in *gp-params*.") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; utilities (defun randint (n) "Calls the random-integer function from random.cl to produce a random integer between 0 (inclusive) and n (exclusive)." (random::random-integer n)) (defun lgp-output (&rest format-args) "Interprets its arguments as FORMAT does (without a stream argument) and sends the output both to standard output (e.g. the listener) and, if *lgp-output-path* is non-nil, appends it to the file at that path." (apply #'format (cons t format-args)) (when *lgp-output-path* (with-open-file (out *lgp-output-path* :direction :output :if-exists :append :if-does-not-exist :create) (apply #'format (cons out format-args))))) (defun random-element (list) "Returns a random element of the given list." (nth (randint (length list)) list)) (defun initial-segment (list) "Returns a random initial segment of the given list." (butlast list (randint (1+ (length list))))) (defun tail-segment (list) "Returns a random tail segment of the given list." (nthcdr (randint (1+ (length list))) list)) (defun middle-segment (list) "Returns a random middle segment of the given list." (initial-segment (tail-segment list))) (defun report-best (cycle &optional (no-matter-what nil)) "Reports on the current best individual in the lgp run. Uses the provided cycle number and *report-every* to determine if reporting is really warranted, and reports only if these numbers indicate that a report should be made OR if the optional no-matter-what parameter is true. The best individual is printed in a format that allows for easy re-setting of *best-individual*." (labels ((report () (lgp-output "~%;; -*- Report at cycle ~A -*-" cycle) (lgp-output "~%(setq *best-individual* '~A)" *best-individual*))) (if no-matter-what (report) (unless (zerop *report-every*) (when (zerop (mod cycle *report-every*)) (report)))))) (defun set-gp-params () "Sets the lgp parameters as indicated in *gp-params*." (dolist (pair *gp-params*) (set (first pair) (second pair)))) (defun print-gp-params () "Prints a report of the current values of lgp parameters. The parameters are printed in a format that allows for easy re-setting of *gp-params*." (lgp-output "~%(setq *gp-params* '~A)~%" *gp-params*)) (defun random-individual () "Performs a selection tournament on randomly selected individuals and returns the winner. :no-computed-fitness beats any computed fitness value." (let ((candidates nil)) (dotimes (i *selection-tournament-size*) (push (aref *population* (randint *population-size*)) candidates)) (let ((unevaluated (find :no-computed-fitness candidates :key #'first))) (if unevaluated unevaluated (let ((sorted-candidates (sort candidates #'lexicographically-better-fitness :key #'first))) (first sorted-candidates)))))) (defun lexicographically-better-fitness (fitness-list1 fitness-list2) "Returns t if fitness-list1 is better than fitness-list2 with the fitness components considered lexicographically. Assumes all fitness-lists are the same length." (cond ((null fitness-list1) nil) ((<= (abs (- (first fitness-list1) (first fitness-list2))) *lexicographic-fitness-epsilon*) (lexicographically-better-fitness (rest fitness-list1) (rest fitness-list2))) ((< (first fitness-list1) (first fitness-list2)) t) (t nil))) (defun noop () "Does nothing. Used for substitutions in MINIMIZATION genetic operators." nil) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; top level and associated functions (defun evolve () "Top-level function for lgp -- call it to start an lgp run." ;; set and print the paramters as specified in *gp-params* (set-gp-params) (print-gp-params) ;; execute any initialization forms and seed the random number generator (dolist (f *initialization-forms*) (eval f)) (random::seed-state (first *random-seeds*) (second *random-seeds*)) ;; generate the initial population (generate-initial-population) ;; begin the operator-application loop (do ((cycle 0 (+ cycle 1))) ((not (lexicographically-better-fitness ;; check for halting condition *halting-fitness* (first *best-individual*))) (report-best cycle t) ;; always report before halting *Best-Individual*) ;; return the best individual ;; apply an operator (let ((new-individual (keep-within-size-limit (funcall (random-element *genetic-operators*)))) (challenge-index (randint *population-size*))) ;; possibly insert the new individual into the population (setf (aref *population* challenge-index) (replacement-tournament new-individual (aref *population* challenge-index))) (report-best cycle)))) ;; report if it's time to do so (defun execute-program (program) "Executes the given lgp program. Note that arguments are not evaluated prior to calling the functions in the program." (let* ((num-instructions (length program)) (program-array (make-array num-instructions :initial-contents program))) ;; we use an explicit instruction pointer so that control structures ;; can be implemented later (setq *instruction-pointer* 0) (loop (when (>= *instruction-pointer* num-instructions) (return)) ;; ternimate when instruction pointer out of range (let ((instruction (aref program-array *instruction-pointer*))) (apply (first instruction) (rest instruction))) (incf *instruction-pointer*)))) (defun generate-random-program () "Creates and returns a random lgp program according to *max-initial-program-length* and *max-initial-program-length*." (let ((program nil)) (dotimes (which-instruction (randint *max-initial-program-length*)) (push (eval (random-element *instruction-generators*)) program)) program)) (defun generate-initial-population () "Initializes the lgp population and fills it with new random individuals." (setq *population* (make-array *population-size*)) (dotimes (which-individual *population-size*) (setf (aref *population* which-individual) (list :no-computed-fitness (generate-random-program))))) (defun keep-within-size-limit (individual) "Returns a version individual that obeys the *max-program-length*, obtained via truncation if necessary." (let* ((program (second individual)) (program-length (length program))) (if (> program-length *max-program-length*) (list :no-computed-fitness (butlast program (- program-length *max-program-length*))) individual))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; genetic operators (defun reproduction () "Returns a random individual from the population, produced via selection tournament." (random-individual)) (defun crossover () "Returns a new individual constructed from a random initial segment of one existing individual appended to a random tail segment from a possibly different existing individual." (let ((parent1 (random-individual)) (parent2 (random-individual))) (list :no-computed-fitness (append (initial-segment (second parent1)) (tail-segment (second parent2)))))) (defun mutation () "Returns a new individual constructed by substituting one randomly generated instruction for one instruction in an existing individual." (let* ((parent-program (copy-tree (second (random-individual)))) (instruction-index (randint (length parent-program))) (new-instruction (eval (random-element *instruction-generators*)))) (unless (null parent-program) (setf (nth instruction-index parent-program) new-instruction)) (list :no-computed-fitness parent-program))) (defun insertion () "Returns a new individual constructed by sandwiching a random middle segment of one individual between random initial and tail segments of a possibly different individual. Note that the initial and tail segments might overlap or fail to include all instructions from the 'outer' individual." (let* ((program1 (second (random-individual))) (prog1-length (length program1)) (program2 (second (random-individual)))) (list :no-computed-fitness (let ((split-point (randint (+ 1 prog1-length)))) (append (butlast program1 (- prog1-length split-point)) (middle-segment program2) (nthcdr split-point program1)))))) (defun mutant-insertion () "Returns a new individual constructed by inserting a new random program within the program of an existing individual. Unlike the case with the INSERTION operator the 'outer' individual will be included in its entirety, with no possibility of duplicated parts." (let* ((program (second (random-individual))) (program-length (length program))) (list :no-computed-fitness (let ((split-point (randint (+ 1 (length program))))) (append (butlast program (- program-length split-point)) (generate-random-program) (nthcdr split-point program)))))) (defun deletion () "Returns a new individual constructed by removing some middle segment of an existing individual." (let* ((parent (random-individual)) (parent-prog (second parent)) (first-part (initial-segment parent-prog)) (second-part (tail-segment (nthcdr (length first-part) parent-prog)))) (list :no-computed-fitness (append first-part second-part)))) (defun minimization (fitness-function) "Returns a new individual constructed by removing gates from an existing individual. Each gate is examined for removal, starting from the beginning, and the removal is accepted if it results in an equal or higher lexicographic fitness. NOTE THAT THIS IS VERY EXPENSIVE! Because this operator takes an argument, to use it you must define a more specific function like (defun my-min () (minimization my-fit-fn)) to include in *genetic-operators*." (let* ((individual (random-individual)) (program (second individual)) (program-length (length program)) (fitness (funcall fitness-function program))) (dotimes (instr-index program-length) (let ((new-program (copy-tree program))) (setf (nth instr-index new-program) (list 'noop)) (let ((new-fitness (funcall fitness-function new-program))) (unless (lexicographically-better-fitness fitness new-fitness) (setq program new-program fitness new-fitness))))) (list :no-computed-fitness (remove (list 'noop) program :test #'equalp)))) (defun pair-minimization (fitness-function) "Like minimization but examines every *pair* of instructions for possible removal. This is even more expensive -- n^2 in the number of instructions in the chosen program." (let* ((individual (random-individual)) (program (second individual)) (program-length (length program)) (fitness (funcall fitness-function program))) (dotimes (instr-index-1 program-length) (dotimes (instr-index-2 program-length) (when (not (= instr-index-1 instr-index-2)) (let ((new-program (copy-tree program))) (setf (nth instr-index-1 new-program) (list 'noop)) (setf (nth instr-index-2 new-program) (list 'noop)) (let ((new-fitness (funcall fitness-function new-program))) (unless (lexicographically-better-fitness fitness new-fitness) (setq program new-program fitness new-fitness))))))) (list :no-computed-fitness (remove (list 'noop) program :test #'equalp)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; demo #| This is a fairly silly demo, included just to demonstrate lgp's features. The basic idea derives from the game of blackjack, in which one is dealt cards one at a time in an attempt to get a total of 21. In real blackjack you want to stay under 21, so 20 is pretty good but 22 is useless. Here we just want to get exactly the target value, and we'll use a higher value just to make it a little harder. And in the real game the cards are dealt one at a time and the player's task is to bet and decide when to stop requesting new cards (which are requested by saying "hit me"). Here, we are instead workin on the rather different task of trying to evolve sequences of cards that add up to the target value, and there's no dealing or stopping involved. So the analogy is a bit twisted. |# ;; First we define some variables and utilites for the demo application. (defvar *hand* 0 "Holds the total of the cards that have been dealt so far.") (defparameter *desired-total* 121 "The number that we want *hand* to contain after the cards are dealt.") (defun hit-me (n) "Adds n to *hand*." (incf *hand* n)) (defun deal-a-card () "Returns a form which, when evaluated, deals a random card." (list 'hit-me (+ 1 (randint 13)))) (defun demo-fitness (program) "Evaluates the fitness of a program for the demo. The first fitness component is the 'error' in the total value, and the second is the length of the program." (list (progn (setq *hand* 0) (execute-program program) (abs (- *hand* *desired-total*))) (length program))) (defun demo-fitness-computed-if-necessary (individual) "If the fitness of the individual has already been computed then this just returns it. Otherwise demo-fitness is called on the individual's program and the result is returned." (if (eq (first individual) :no-computed-fitness) (demo-fitness (second individual)) (first individual))) (defun demo-minimization () "A version of minimization for the demo." (minimization #'demo-fitness)) (defun demo-pair-minimization () "A version of pair-minimization for the demo." (pair-minimization #'demo-fitness)) ;; Now we give *gp-params* an appropriate value for the demo: (setq *gp-params* '((*instruction-generators* ((deal-a-card))) (*genetic-operators* (reproduction crossover mutation insertion mutant-insertion deletion demo-minimization demo-pair-minimization)) (*population-size* 1000) (*selection-tournament-size* 5) (*best-individual* ((999999999 999999999) nil)) (*halting-fitness* (0 13)) (*report-every* 50) (*max-initial-program-length* 10) (*max-program-length* 50) (*random-seeds* (0 0)) (*lexicographic-fitness-epsilon* 0) (*initialization-forms* nil) (*percent-losers-win* 10) )) ;; Here's our application-specific definition for replacement-tournament; (defun replacement-tournament (individual1 individual2) "Returns winner. Sets *best-individual* if appropriate." (let* ((program1 (second individual1)) (program2 (second individual2)) (fitness-list1 (demo-fitness-computed-if-necessary individual1)) (fitness-list2 (demo-fitness-computed-if-necessary individual2)) winner loser (new-best nil)) (if (lexicographically-better-fitness fitness-list1 fitness-list2) (progn (setq winner (list fitness-list1 program1)) (setq loser (list fitness-list2 program2))) (progn (setq winner (list fitness-list2 program2)) (setq loser (list fitness-list1 program1)))) (when (lexicographically-better-fitness (first winner) (first *best-individual*)) (setq new-best t) (setq *best-individual* winner)) ;; return value (cond (new-best winner) ;; always return winner if it's a new best ((< (randint 100) *percent-losers-win*) loser) (t winner)))) ;; Evaluate (evolve) to run the demo.