#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
At what level would you like to evolve code?
What percentage of the following would be valid code,
if generated randomly?

This is binary code:
0101010011100010010110101010110

This is HEX:
50f0 4954 c0c7 12a0 0040 c748 30c1 4012

This is assembly:
mov    rsi,rax
mov    rdi,rdx
mov    esi,0x401040
mov    rdi,rax
mov    eax,0x0

This is bytecode:
TODO

This is a formula:
Y = (20 + 45) * (56 - X)

This is it's infix notation:
https://en.wikipedia.org/wiki/Infix_notation
Y = ( ( 20 + 45 ) * ( 56 - X ) )

This is it's prefix notation:
https://en.wikipedia.org/wiki/Polish_notation
Y = ( * ( + 20 45 ) ( - 56 X ) )

This is it's postfix notation:
https://en.wikipedia.org/wiki/Reverse_Polish_notation
Y = ( ( 20 45 + ) ( 56 X - ) * )

This is it's tree:
       *
      /  \
    +     -
   / \   / \
  20 45 56  X

This is it's python code:
"""
X = 4
Y = (20 + 45) * (56 - X)


def my_formula(X: int) -> int:
    return (20 + 45) * (56 - X)


Y = my_formula(X=4)

"""
We're not using any user-defined classes,
and so are doing functional python programming above.

This is the code in clojure:
Y = ( * ( + 20 45 ) ( - 56 X ) )

(defn my_formula [X] ( * ( + 20 45 ) ( - 56 X ) ) )
(def Y (my_formula 4))

How do we represent code as a genome?
What data structure might we use?
Does the language matter?
Do some languages make the representation easier?
Recall n-queens: we could have chosen:
a matrix or vector,
integer or permutation,
and all would have worked,
though some better than others.

This is the same code in the Push language:
TODO
"""

from typing import TypedDict
from typing import Optional


class Node:
    pass


class InternalNode(Node):
    def __init__(
        self, data: str, left: Optional[Node] = None, right: Optional[Node] = None
    ) -> None:
        self.data = data
        self.left = left
        self.right = right


class LeafNode(Node):
    def __init__(self, data: str) -> None:
        self.data = data


root = InternalNode("*", left=None, right=None)
root.left = InternalNode("+", left=None, right=None)
root.left.left = LeafNode("20")
root.left.right = LeafNode("45")
root.right = InternalNode("-", left=None, right=None)
root.right.left = LeafNode("56")
root.right.right = LeafNode("X")


def print_tree(root: Node, indent: str) -> None:
    if isinstance(root, LeafNode):
        print(indent + root.data)
        return
    print_tree(root=root.right, indent=indent + "    ")
    print(indent, root.data)
    print_tree(root=root.left, indent=indent + "    ")


print_tree(root=root, indent="")
print()

# This also works:
root = InternalNode(
    data="*",
    left=InternalNode(data="+", left=LeafNode("20"), right=LeafNode("45")),
    right=InternalNode(data="-", left=LeafNode("56"), right=LeafNode("X")),
)
print_tree(root=root, indent="")
print()


def parse_expression(source_code: str) -> Node:
    source_code = source_code.replace("(", "")
    source_code = source_code.replace(")", "")
    code_arr = source_code.split()
    return parse_rec(code_arr)


def parse_rec(code: list[str]) -> Node:
    """Assumes code is prefix notation lisp with space delimeters."""
    if code[0] in "+-/*":
        temp = InternalNode(data=code.pop(0))
        temp.left = parse_rec(code)
        temp.right = parse_rec(code)
        return temp
    else:
        return LeafNode(code.pop(0))


clojure_code = "( * ( + 20 45 ) ( - 56 X ) )"
root = parse_expression(clojure_code)
print_tree(root=root, indent="")


def parse_tree_print(root: Node) -> None:
    if isinstance(root, InternalNode):
        print(f"( {root.data} ", end="")
        parse_tree_print(root.left)
        parse_tree_print(root.right)
        print(") ", end="")
    else:
        # for the case of literal programs... e.g., `4`
        print(f"{root.data} ", end="")


parse_tree_print(root)
print()


def parse_tree_return(root: Node) -> str:
    if isinstance(root, InternalNode):
        return f"( {root.data} {parse_tree_return(root.left)} {parse_tree_return(root.right)} )"
    else:
        # for the case of literal programs... e.g., `4`
        return root.data


stringified = parse_tree_return(root)
print(stringified)


# Running clojure/lisp code in python...
# This is hacky in python,
# and could have been done other hacky ways.
import subprocess

returnobject = subprocess.run(["clojure", "-e", "(+ 3 4)"], capture_output=True)
result = int(returnobject.stdout.decode().strip())
print(result)

"""
Here's another example:
Celcius to Farenheight

infix:
F = ( ( C * ( 9 / 5 ) ) + 32 )

prefix:
F = ( + ( * C ( / 9 5 ) ) 32 )

postfix:
F = ( ( C ( 9 5 / ) * ) 32 + )

How might we evolve this formula from data itself?
What is our fitness function?
How is this similar to homework autograding?
Tests?
Inputs
Outputs?
Data formats?
Recall the PSB?
https://cs.hamilton.edu/~thelmuth/PSB2/PSB2.html
"""
import psb2

(train_data, test_data) = psb2.fetch_examples(
    "path/to/PSB2/datasets/", "bowling", 200, 2000
)
print(train_data)
print(test_data)


class Individual(TypedDict):
    genome: Node
    fitness: int


def initialize_individual(genome: str, fitness: int) -> Individual:
    """
    Purpose:        Create one individual
    Parameters:     genome as string, fitness as integer (higher better)
    User Input:     no
    Prints:         no
    Returns:        One Individual, as a dict[str, int]
    Modifies:       Nothing
    Calls:          Basic python only
    Example doctest:
    """
    return {"genome": parse_expression(genome), "fitness": fitness}


def evaluate_individual(individual: Individual, io_pairs: list[dict[str, str]]) -> None:
    """
    Purpose:        Computes and modifies the fitness for one individual
    Parameters:     One Individual
    User Input:     no
    Prints:         no
    Returns:        None
    Modifies:       The individual (mutable object)
    Calls:          Basic python only
    Notes:          train/test format is like PSB2
    Example doctest:
    """
    fitness = 0
    C = list(range(-10, 110, 10))
    F = [(c * (9 / 5)) + 32 for c in C]
    train = [{"input1": c, "output1": f} for c, f in zip(C, F)]
    errors: list[float] = []
    for sub_eval in train:
        eval_string = parse_tree_return(individual["genome"]).replace(
            "C", str(sub_eval["input1"])
        )
        eval_string = "( float " + eval_string + ")"
        returnobject = subprocess.run(
            ["clojure", "-e", eval_string], capture_output=True
        )
        result = float(returnobject.stdout.decode().strip().replace("N", ""))
        errors.append(abs(sub_eval["output1"] - result))
    fitness = sum(errors)
    print(fitness)
    individual["fitness"] = fitness


# correct
ind1 = initialize_individual("( + ( * C ( / 9 5 ) ) 32 )", 0)
# bad
ind2 = initialize_individual("( - ( * C ( / 9 5 ) ) 32 )", 0)
ind3 = initialize_individual("( + ( * C ( / 5 9 ) ) 32 )", 0)
# close
ind4 = initialize_individual("( + ( * C ( / 9 5 ) ) 33 )", 0)
# really bad
ind5 = initialize_individual("( + C 32 )", 0)

evaluate_individual(ind1)
evaluate_individual(ind2)
evaluate_individual(ind3)
evaluate_individual(ind4)
evaluate_individual(ind5)