What is context-free grammar?
A grammar is a set of production rules for a language. Context-free grammar is any grammar that is of the form , where
is a finite set of nonterminal characters,
is a finite set of terminal characters,
is the relation from
to
, and
is the start symbol.
This is what it looks like in code form:
symbols = ['S','I','T','x'] grammar = { 'S':['I+I','I-I','I*I','I/I'], 'I':['I+I','I-I','I*I','I/I','(I)'] + ['T']*5, 'T':['x'], 'x':[str(float(x)) for x in range(1,10)] }
I used multiple instances of the symbol , which is a generic pointer to a terminal symbol. Using just one makes the nonterminal characters so much more likely to be chosen, that the expressions tend to never end.
We can think of an individual instance of resulting language to be the translation of the right-most character at any point. Then, an individual can be defined by a fixed-length list of numbers.
attribute_size = max([len(x) for x in grammar.values()]) individual = [random.randint(0,attribute_size-1) for _ in range(30)] print(individual)
Running it results in the following output:
[6, 5, 3, 5, 1, 8, 3, 7, 9, 3, 9, 2, 6, 9, 1, 8, 3, 0, 6, 3, 4, 8, 8, 6, 8, 5, 4, 9, 7, 3]
Now we must define a method of converting this list to a string. I had to check for the case that it was done, by storing the string from the loop before and seeing if it has changed. Also note that we can disregard how large the rule numbers can be because we can use the modulo operation to index over the rules.
def express(individual, symbols, grammar): expression = symbols[0] for branch in individual: expression_old = expression for symbol in symbols: if symbol in expression: i = expression.rindex(symbol) rules = grammar[symbol] replacement = rules[branch % len(rules)] expression = expression[:i] + replacement + expression[i+1:] break if expression == expression_old: break return expression
Then, our individual from earlier becomes:
7.0/1.0-4.0/9.0*2.0
Evolutionary algorithms using DEAP
Evolutionary algorithms describe the class of algorithms that use simulated evolution for the purpose of optimization. Given a fixed length of attributes, individuals can “breed” with each other using crossover points. After many generations, with an optional mutation factor, we arrive at increasingly optimal populations.
DEAP is a library for Distributed Evolutionary Algorithms in Python. I used their onemax example to write the code below to maximize arithmetic operations. In this case, I only made the individual long enough to result in two numbers, so the answer should be .

optimizers.py
import random, math random.seed(64) from deap import base from deap import creator from deap import tools class EvolutionaryCFG: ''' Evolutionary Context-Free Grammar ''' def __init__(self, symbols, grammar, evaluation_function, population_size=20, num_attributes=30): self.symbols = symbols self.grammar = grammar attribute_size = max([len(x) for x in grammar.values()]) self.evaluation_function = evaluation_function creator.create('FitnessMax', base.Fitness, weights=(1.0,)) creator.create('Individual', list, fitness=creator.FitnessMax) self.toolbox = base.Toolbox() self.toolbox.register('attr_int', random.randint, 0, attribute_size-1) self.toolbox.register('individual', tools.initRepeat, creator.Individual, self.toolbox.attr_int, num_attributes) self.toolbox.register('population', tools.initRepeat, list, self.toolbox.individual) self.toolbox.register('evaluate', self.evaluate) self.toolbox.register('mate', tools.cxOnePoint) self.toolbox.register('mutate', tools.mutUniformInt, low=0, up=255, indpb=0.05) self.toolbox.register('select', tools.selTournament, tournsize=3) self.population = self.toolbox.population(n=population_size) self.mate_prob = 0.5 self.mutant_prob = 0.2 # Prevents re-evaluation of the same individuals (assumes determinism) self.score_cache = {} def express(self, individual): expression = 'S' for branch in individual: expression_old = expression for symbol in self.symbols: if symbol in expression: i = expression.rindex(symbol) rules = self.grammar[symbol] replacement = rules[branch % len(rules)] expression = expression[:i] + replacement + expression[i+1:] break if expression == expression_old: break return expression def evaluate(self, individual): expression = self.express(individual) if expression not in self.score_cache.keys(): if any([symbol in expression for symbol in self.symbols]): score = float('-inf') else: try: score = self.evaluation_function(expression) except: score = float('-inf') self.score_cache[expression] = score else: score = self.score_cache[expression] return score, def evolve(self, generations): print('Start of evolution') # Evaluate the entire population print('\tGeneration 0') fitnesses = list(map(self.toolbox.evaluate, self.population)) for ind, fit in zip(self.population, fitnesses): ind.fitness.values = fit print('\t\tEvaluated %i individuals' % len(self.population)) # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in self.population if ind.fitness.values[0] != float('-inf')] length = len(self.population) mean = sum(fits) / length sum2 = sum(x*x for x in fits) std = abs(sum2 / length - mean**2)**0.5 print('\t\t\tMin %s' % min(fits)) print('\t\t\tMax %s' % max(fits)) print('\t\t\tAvg %s' % mean) print('\t\t\tStd %s' % std) for generation in xrange(1,generations+1): print('\tGeneration %i' % generation) # Select the next generation individuals offspring = self.toolbox.select(self.population, len(self.population)) # Clone the selected individuals offspring = list(map(self.toolbox.clone, offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): # cross two individuals with probability self.mate_prob if random.random() < self.mate_prob: self.toolbox.mate(child1, child2) # fitness values of the children must be recalculated later del child1.fitness.values del child2.fitness.values # mutate an individual with probability self.mutant_prob for mutant in offspring: if random.random() < self.mutant_prob: self.toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = map(self.toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit print('\t\tEvaluated %i individuals' % len(invalid_ind)) # The population is entirely replaced by the offspring self.population[:] = offspring # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in self.population if ind.fitness.values[0] != float('-inf')] length = len(self.population) mean = sum(fits) / length sum2 = sum(x*x for x in fits) std = abs(sum2 / length - mean**2)**0.5 print('\t\t\tMin %s' % min(fits)) print('\t\t\tMax %s' % max(fits)) print('\t\t\tAvg %s' % mean) print('\t\t\tStd %s' % std) print('Done!') best_ind = tools.selBest(self.population, 1)[0] print('Best individual is\n\t%s\n\t%s\n\nResult:%s' % (best_ind, self.express(best_ind), best_ind.fitness.values))
main.py
import math from optimizers import EvolutionaryCFG def evaluate(expression): return math.fabs(eval(expression)) def main(): symbols = ['x','y','z','T','I','S'] grammar = { 'x':[str(float(x)) for x in xrange(1,10)], 'y':[str(float(x)) for x in xrange(1,10)], 'z':[str(float(x)) for x in xrange(1,10)], 'T':['x','y','z'], 'I':['I+I','I-I','I*I','I/I','(I)'] + ['T']*5, 'S':['I+I','I-I','I*I','I/I'] } optimizer = EvolutionaryCFG(symbols, grammar, evaluate, population_size=300, num_attributes=7) optimizer.evolve(generations=4) if __name__ == '__main__': main()
Results
Running main.py has the following result:
Start of evolution Generation 0 Evaluated 300 individuals Min 0.0 Max 54.0 Avg 2.02304232804 Std 6.04227568275 Generation 1 Evaluated 167 individuals Min 0.0 Max 63.0 Avg 5.24390740741 Std 9.29000294167 Generation 2 Evaluated 172 individuals Min 0.0 Max 72.0 Avg 12.2283809524 Std 14.7002320802 Generation 3 Evaluated 188 individuals Min 0.571428571429 Max 81.0 Avg 22.6207936508 Std 19.0033049194 Generation 4 Evaluated 180 individuals Min 0.0 Max 81.0 Avg 37.4266666667 Std 19.6186464591 Done! Best individual is [6, 5, 4, 8, 6, 1, 8] 9.0*9.0 Result:(81.0,)