#!/usr/bin/python3 # -*- coding: utf-8 -*- """ At what level would you like to evolve code? What percentage of the following would be valid code, if generated randomly? This is binary code: 0101010011100010010110101010110 This is HEX: 50f0 4954 c0c7 12a0 0040 c748 30c1 4012 This is assembly: mov rsi,rax mov rdi,rdx mov esi,0x401040 mov rdi,rax mov eax,0x0 This is bytecode: TODO This is a formula: Y = (20 + 45) * (56 - X) This is it's infix notation: https://en.wikipedia.org/wiki/Infix_notation Y = ( ( 20 + 45 ) * ( 56 - X ) ) This is it's prefix notation: https://en.wikipedia.org/wiki/Polish_notation Y = ( * ( + 20 45 ) ( - 56 X ) ) This is it's postfix notation: https://en.wikipedia.org/wiki/Reverse_Polish_notation Y = ( ( 20 45 + ) ( 56 X - ) * ) This is it's tree: * / \ + - / \ / \ 20 45 56 X This is it's python code: """ X = 4 Y = (20 + 45) * (56 - X) def my_formula(X: int) -> int: return (20 + 45) * (56 - X) Y = my_formula(X=4) """ We're not using any user-defined classes, and so are doing functional python programming above. This is the code in clojure: Y = ( * ( + 20 45 ) ( - 56 X ) ) (defn my_formula [X] ( * ( + 20 45 ) ( - 56 X ) ) ) (def Y (my_formula 4)) How do we represent code as a genome? What data structure might we use? Does the language matter? Do some languages make the representation easier? Recall n-queens: we could have chosen: a matrix or vector, integer or permutation, and all would have worked, though some better than others. This is the same code in the Push language: TODO """ from typing import TypedDict from typing import Optional class Node: pass class InternalNode(Node): def __init__( self, data: str, left: Optional[Node] = None, right: Optional[Node] = None ) -> None: self.data = data self.left = left self.right = right class LeafNode(Node): def __init__(self, data: str) -> None: self.data = data root = InternalNode("*", left=None, right=None) root.left = InternalNode("+", left=None, right=None) root.left.left = LeafNode("20") root.left.right = LeafNode("45") root.right = InternalNode("-", left=None, right=None) root.right.left = LeafNode("56") root.right.right = LeafNode("X") def print_tree(root: Node, indent: str) -> None: if isinstance(root, LeafNode): print(indent + root.data) return print_tree(root=root.right, indent=indent + " ") print(indent, root.data) print_tree(root=root.left, indent=indent + " ") print_tree(root=root, indent="") print() # This also works: root = InternalNode( data="*", left=InternalNode(data="+", left=LeafNode("20"), right=LeafNode("45")), right=InternalNode(data="-", left=LeafNode("56"), right=LeafNode("X")), ) print_tree(root=root, indent="") print() def parse_expression(source_code: str) -> Node: source_code = source_code.replace("(", "") source_code = source_code.replace(")", "") code_arr = source_code.split() return parse_rec(code_arr) def parse_rec(code: list[str]) -> Node: """Assumes code is prefix notation lisp with space delimeters.""" if code[0] in "+-/*": temp = InternalNode(data=code.pop(0)) temp.left = parse_rec(code) temp.right = parse_rec(code) return temp else: return LeafNode(code.pop(0)) clojure_code = "( * ( + 20 45 ) ( - 56 X ) )" root = parse_expression(clojure_code) print_tree(root=root, indent="") def parse_tree_print(root: Node) -> None: if isinstance(root, InternalNode): print(f"( {root.data} ", end="") parse_tree_print(root.left) parse_tree_print(root.right) print(") ", end="") else: # for the case of literal programs... e.g., `4` print(f"{root.data} ", end="") parse_tree_print(root) print() def parse_tree_return(root: Node) -> str: if isinstance(root, InternalNode): return f"( {root.data} {parse_tree_return(root.left)} {parse_tree_return(root.right)} )" else: # for the case of literal programs... e.g., `4` return root.data stringified = parse_tree_return(root) print(stringified) # Running clojure/lisp code in python... # This is hacky in python, # and could have been done other hacky ways. import subprocess returnobject = subprocess.run(["clojure", "-e", "(+ 3 4)"], capture_output=True) result = int(returnobject.stdout.decode().strip()) print(result) """ Here's another example: Celcius to Farenheight infix: F = ( ( C * ( 9 / 5 ) ) + 32 ) prefix: F = ( + ( * C ( / 9 5 ) ) 32 ) postfix: F = ( ( C ( 9 5 / ) * ) 32 + ) How might we evolve this formula from data itself? What is our fitness function? How is this similar to homework autograding? Tests? Inputs Outputs? Data formats? Recall the PSB? https://cs.hamilton.edu/~thelmuth/PSB2/PSB2.html """ import psb2 (train_data, test_data) = psb2.fetch_examples( "path/to/PSB2/datasets/", "bowling", 200, 2000 ) print(train_data) print(test_data) class Individual(TypedDict): genome: Node fitness: int def initialize_individual(genome: str, fitness: int) -> Individual: """ Purpose: Create one individual Parameters: genome as string, fitness as integer (higher better) User Input: no Prints: no Returns: One Individual, as a dict[str, int] Modifies: Nothing Calls: Basic python only Example doctest: """ return {"genome": parse_expression(genome), "fitness": fitness} def evaluate_individual(individual: Individual, io_pairs: list[dict[str, str]]) -> None: """ Purpose: Computes and modifies the fitness for one individual Parameters: One Individual User Input: no Prints: no Returns: None Modifies: The individual (mutable object) Calls: Basic python only Notes: train/test format is like PSB2 Example doctest: """ fitness = 0 C = list(range(-10, 110, 10)) F = [(c * (9 / 5)) + 32 for c in C] train = [{"input1": c, "output1": f} for c, f in zip(C, F)] errors: list[float] = [] for sub_eval in train: eval_string = parse_tree_return(individual["genome"]).replace( "C", str(sub_eval["input1"]) ) eval_string = "( float " + eval_string + ")" returnobject = subprocess.run( ["clojure", "-e", eval_string], capture_output=True ) result = float(returnobject.stdout.decode().strip().replace("N", "")) errors.append(abs(sub_eval["output1"] - result)) fitness = sum(errors) print(fitness) individual["fitness"] = fitness # correct ind1 = initialize_individual("( + ( * C ( / 9 5 ) ) 32 )", 0) # bad ind2 = initialize_individual("( - ( * C ( / 9 5 ) ) 32 )", 0) ind3 = initialize_individual("( + ( * C ( / 5 9 ) ) 32 )", 0) # close ind4 = initialize_individual("( + ( * C ( / 9 5 ) ) 33 )", 0) # really bad ind5 = initialize_individual("( + C 32 )", 0) evaluate_individual(ind1) evaluate_individual(ind2) evaluate_individual(ind3) evaluate_individual(ind4) evaluate_individual(ind5)