grid_mdp.py 22,7 ko
Newer Older
# author: ad71
import tkinter as tk
import tkinter.messagebox
from tkinter import ttk

from functools import partial

import sys
import os.path
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

from mdp import *
import utils
import numpy as np
import time

import matplotlib
import matplotlib.animation as animation
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.ticker import MaxNLocator
from matplotlib.figure import Figure
from matplotlib import style
from matplotlib import pyplot as plt
matplotlib.use('TkAgg')
style.use('ggplot')

fig = Figure(figsize=(20, 15))
sub = fig.add_subplot(111)
plt.rcParams['axes.grid'] = False

WALL_VALUE = -99999.0
TERM_VALUE = -999999.0

black = '#000'
white = '#fff'
gray2 = '#222'
gray9 = '#999'
grayd = '#ddd'
grayef = '#efefef'
pblue = '#000040'
green8 = '#008080'
green4 = '#004040'


def extents(f):
	''' adjusts axis markers for heatmap '''

	delta = f[1] - f[0]
	return [f[0] - delta/2, f[-1] + delta/2]

def display(gridmdp, _height, _width):
	''' displays matrix '''

	dialog = tk.Toplevel()
	dialog.wm_title('Values')

	container = tk.Frame(dialog)
	container.pack(side=tk.TOP, fill=tk.BOTH, expand=True)

	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			label = ttk.Label(container, text=f'{gridmdp[_height - i - 1][j]:.3f}', font=('Helvetica', 12))
			label.grid(row=i + 1, column=j + 1, padx=3, pady=3)

	dialog.mainloop()

def display_best_policy(_best_policy, _height, _width):
	''' displays best policy '''

	dialog = tk.Toplevel()
	dialog.wm_title('Best Policy')

	container = tk.Frame(dialog)
	container.pack(side=tk.TOP, fill=tk.BOTH, expand=True)

	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			label = ttk.Label(container, text=_best_policy[i][j], font=('Helvetica', 12, 'bold'))
			label.grid(row=i + 1, column=j + 1, padx=3, pady=3)

	dialog.mainloop()

def initialize_dialogbox(_width, _height, gridmdp, terminals, buttons):
	''' creates dialogbox for initialization '''

	dialog = tk.Toplevel()
	dialog.wm_title('Initialize')

	container = tk.Frame(dialog)
	container.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
	container.grid_rowconfigure(0, weight=1)
	container.grid_columnconfigure(0, weight=1)

	wall = tk.IntVar()
	wall.set(0)
	term = tk.IntVar()
	term.set(0)
	reward = tk.DoubleVar()
	reward.set(0.0)

	label = ttk.Label(container, text='Initialize', font=('Helvetica', 12), anchor=tk.N)
	label.grid(row=0, column=0, columnspan=3, sticky='new', pady=15, padx=5)
	label_reward = ttk.Label(container, text='Reward', font=('Helvetica', 10), anchor=tk.N)
	label_reward.grid(row=1, column=0, columnspan=3, sticky='new', pady=1, padx=5)
	entry_reward = ttk.Entry(container, font=('Helvetica', 10), justify=tk.CENTER, exportselection=0, textvariable=reward)
	entry_reward.grid(row=2, column=0, columnspan=3, sticky='new', pady=5, padx=50)

	rbtn_term = ttk.Radiobutton(container, text='Terminal', variable=term, value=TERM_VALUE)
	rbtn_term.grid(row=3, column=0, columnspan=3, sticky='nsew', padx=160, pady=5)
	rbtn_wall = ttk.Radiobutton(container, text='Wall', variable=wall, value=WALL_VALUE)
	rbtn_wall.grid(row=4, column=0, columnspan=3, sticky='nsew', padx=172, pady=5)

	initialize_widget_disability_checks(_width, _height, gridmdp, terminals, label_reward, entry_reward, rbtn_wall, rbtn_term)

	btn_apply = ttk.Button(container, text='Apply', command=partial(initialize_update_table, _width, _height, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_term, rbtn_wall))
	btn_apply.grid(row=5, column=0, sticky='nsew', pady=5, padx=5)
	btn_reset = ttk.Button(container, text='Reset', command=partial(initialize_reset_all, _width, _height, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_wall, rbtn_term))
	btn_reset.grid(row=5, column=1, sticky='nsew', pady=5, padx=5)
	btn_ok = ttk.Button(container, text='Ok', command=dialog.destroy)
	btn_ok.grid(row=5, column=2, sticky='nsew', pady=5, padx=5)

	dialog.geometry('400x200')
	dialog.mainloop()

def update_table(i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_term, rbtn_wall):
	''' functionality for 'apply' button '''

	if wall.get() == WALL_VALUE:
		buttons[i][j].configure(style='wall.TButton')
		buttons[i][j].config(text='Wall')
		label_reward.config(foreground='#999')
		entry_reward.config(state=tk.DISABLED)
		rbtn_term.state(['!focus', '!selected'])
		rbtn_term.config(state=tk.DISABLED)
		gridmdp[i][j] = WALL_VALUE

	elif wall.get() != WALL_VALUE:
		if reward.get() != 0.0:
			gridmdp[i][j] = reward.get()
			buttons[i][j].configure(style='reward.TButton')
			buttons[i][j].config(text=f'R = {reward.get()}')

		if term.get() == TERM_VALUE:
			if (i, j) not in terminals:
				terminals.append((i, j))
			rbtn_wall.state(['!focus', '!selected'])
			rbtn_wall.config(state=tk.DISABLED)

			if gridmdp[i][j] < 0:
				buttons[i][j].configure(style='-term.TButton')

			elif gridmdp[i][j] > 0:
				buttons[i][j].configure(style='+term.TButton')

			elif gridmdp[i][j] == 0.0:
				buttons[i][j].configure(style='=term.TButton')

def initialize_update_table(_width, _height, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_term, rbtn_wall):
	''' runs update_table for all cells '''

	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			update_table(i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_term, rbtn_wall)

def reset_all(_height, i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_wall, rbtn_term):
	reward.set(0.0)
	term.set(0)
	wall.set(0)
	gridmdp[i][j] = 0.0
	buttons[i][j].configure(style='TButton')
	buttons[i][j].config(text=f'({_height - i - 1}, {j})')

	if (i, j) in terminals:
		terminals.remove((i, j))

	label_reward.config(foreground='#000')
	entry_reward.config(state=tk.NORMAL)
	rbtn_term.config(state=tk.NORMAL)
	rbtn_wall.config(state=tk.NORMAL)
	rbtn_wall.state(['!focus', '!selected'])
	rbtn_term.state(['!focus', '!selected'])

def initialize_reset_all(_width, _height, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_wall, rbtn_term):
	''' runs reset_all for all cells '''

	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			reset_all(_height, i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_wall, rbtn_term)

def external_reset(_width, _height, gridmdp, terminals, buttons):
	''' reset from edit menu '''

	terminals = []
	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			gridmdp[i][j] = 0.0
			buttons[i][j].configure(style='TButton')
			buttons[i][j].config(text=f'({_height - i - 1}, {j})')

def widget_disability_checks(i, j, gridmdp, terminals, label_reward, entry_reward, rbtn_wall, rbtn_term):
	''' checks for required state of widgets in dialogboxes '''

	if gridmdp[i][j] == WALL_VALUE:
		label_reward.config(foreground='#999')
		entry_reward.config(state=tk.DISABLED)
		rbtn_term.config(state=tk.DISABLED)
		rbtn_wall.state(['!focus', 'selected'])
		rbtn_term.state(['!focus', '!selected'])

	if (i, j) in terminals:
		rbtn_wall.config(state=tk.DISABLED)
		rbtn_wall.state(['!focus', '!selected'])

def flatten_list(_list):
	''' returns a flattened list '''

	return sum(_list, [])

def initialize_widget_disability_checks(_width, _height, gridmdp, terminals, label_reward, entry_reward, rbtn_wall, rbtn_term):
	''' checks for required state of widgets when cells are initialized '''
	
	bool_walls = [['False']*max(1, _width) for _ in range(max(1, _height))]
	bool_terms = [['False']*max(1, _width) for _ in range(max(1, _height))]

	for i in range(max(1, _height)):
		for j in range(max(1, _width)):
			if gridmdp[i][j] == WALL_VALUE:
				bool_walls[i][j] = 'True'

			if (i, j) in terminals:
				bool_terms[i][j] = 'True'
				
	bool_walls_fl = flatten_list(bool_walls)
	bool_terms_fl = flatten_list(bool_terms)

	if bool_walls_fl.count('True') == len(bool_walls_fl):
		print('`')
		label_reward.config(foreground='#999')
		entry_reward.config(state=tk.DISABLED)
		rbtn_term.config(state=tk.DISABLED)
		rbtn_wall.state(['!focus', 'selected'])
		rbtn_term.state(['!focus', '!selected'])

	if bool_terms_fl.count('True') == len(bool_terms_fl):
		rbtn_wall.config(state=tk.DISABLED)
		rbtn_wall.state(['!focus', '!selected'])
		rbtn_term.state(['!focus', 'selected'])

def dialogbox(i, j, gridmdp, terminals, buttons, _height):
	''' creates dialogbox for each cell '''

	dialog = tk.Toplevel()
	dialog.wm_title(f'{_height - i - 1}, {j}')

	container = tk.Frame(dialog)
	container.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
	container.grid_rowconfigure(0, weight=1)
	container.grid_columnconfigure(0, weight=1)

	wall = tk.IntVar()
	wall.set(gridmdp[i][j])
	term = tk.IntVar()
	term.set(TERM_VALUE if (i, j) in terminals else 0.0)
	reward = tk.DoubleVar()
	reward.set(gridmdp[i][j] if gridmdp[i][j] != WALL_VALUE else 0.0)

	label = ttk.Label(container, text=f'Configure cell {_height - i - 1}, {j}', font=('Helvetica', 12), anchor=tk.N)
	label.grid(row=0, column=0, columnspan=3, sticky='new', pady=15, padx=5)
	label_reward = ttk.Label(container, text='Reward', font=('Helvetica', 10), anchor=tk.N)
	label_reward.grid(row=1, column=0, columnspan=3, sticky='new', pady=1, padx=5)
	entry_reward = ttk.Entry(container, font=('Helvetica', 10), justify=tk.CENTER, exportselection=0, textvariable=reward)
	entry_reward.grid(row=2, column=0, columnspan=3, sticky='new', pady=5, padx=50)

	rbtn_term = ttk.Radiobutton(container, text='Terminal', variable=term, value=TERM_VALUE)
	rbtn_term.grid(row=3, column=0, columnspan=3, sticky='nsew', padx=160, pady=5)
	rbtn_wall = ttk.Radiobutton(container, text='Wall', variable=wall, value=WALL_VALUE)
	rbtn_wall.grid(row=4, column=0, columnspan=3, sticky='nsew', padx=172, pady=5)

	widget_disability_checks(i, j, gridmdp, terminals, label_reward, entry_reward, rbtn_wall, rbtn_term)

	btn_apply = ttk.Button(container, text='Apply', command=partial(update_table, i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_term, rbtn_wall))
	btn_apply.grid(row=5, column=0, sticky='nsew', pady=5, padx=5)
	btn_reset = ttk.Button(container, text='Reset', command=partial(reset_all, _height, i, j, gridmdp, terminals, buttons, reward, term, wall, label_reward, entry_reward, rbtn_wall, rbtn_term))
	btn_reset.grid(row=5, column=1, sticky='nsew', pady=5, padx=5)
	btn_ok = ttk.Button(container, text='Ok', command=dialog.destroy)
	btn_ok.grid(row=5, column=2, sticky='nsew', pady=5, padx=5)

	dialog.geometry('400x200')
	dialog.mainloop()


class MDPapp(tk.Tk):

	def __init__(self, *args, **kwargs):

		tk.Tk.__init__(self, *args, **kwargs)
		tk.Tk.wm_title(self, 'Grid MDP')
		self.shared_data = {
			'height': tk.IntVar(),
			'width': tk.IntVar()
		}
		self.shared_data['height'].set(1)
		self.shared_data['width'].set(1)
		self.container = tk.Frame(self)
		self.container.pack(side='top', fill='both', expand=True)
		self.container.grid_rowconfigure(0, weight=1)
		self.container.grid_columnconfigure(0, weight=1)

		self.frames = {}

		self.menu_bar = tk.Menu(self.container)
		self.file_menu = tk.Menu(self.menu_bar, tearoff=0)
		self.file_menu.add_command(label='Exit', command=self.exit)
		self.menu_bar.add_cascade(label='File', menu=self.file_menu)

		self.edit_menu = tk.Menu(self.menu_bar, tearoff=1)
		self.edit_menu.add_command(label='Reset', command=self.master_reset)
		self.edit_menu.add_command(label='Initialize', command=self.initialize)
		self.edit_menu.add_separator()
		self.edit_menu.add_command(label='View matrix', command=self.view_matrix)
		self.edit_menu.add_command(label='View terminals', command=self.view_terminals)
		self.menu_bar.add_cascade(label='Edit', menu=self.edit_menu)
		self.menu_bar.entryconfig('Edit', state=tk.DISABLED)

		self.build_menu = tk.Menu(self.menu_bar, tearoff=1)
		self.build_menu.add_command(label='Build and Run', command=self.build)
		self.menu_bar.add_cascade(label='Build', menu=self.build_menu)
		self.menu_bar.entryconfig('Build', state=tk.DISABLED)
		tk.Tk.config(self, menu=self.menu_bar)

		for F in (HomePage, BuildMDP, SolveMDP):
			frame = F(self.container, self)
			self.frames[F] = frame
			frame.grid(row=0, column=0, sticky='nsew')

		self.show_frame(HomePage)

	def placeholder_function(self):
		''' placeholder function '''

		print('Not supported yet!')

	def exit(self):
		''' function to exit '''

		if tkinter.messagebox.askokcancel('Exit?', 'All changes will be lost'):
			quit()

	def new(self):
		''' function to create new GridMDP '''

		self.master_reset()
		build_page = self.get_page(BuildMDP)
		build_page.gridmdp = None
		build_page.terminals = None
		build_page.buttons = None
		self.show_frame(HomePage)

	def get_page(self, page_class):
		''' returns pages from stored frames '''

		return self.frames[page_class]

	def view_matrix(self):
		''' prints current matrix to console '''

		build_page = self.get_page(BuildMDP)
		_height = self.shared_data['height'].get()
		_width = self.shared_data['width'].get()
		print(build_page.gridmdp)
		display(build_page.gridmdp, _height, _width)

	def view_terminals(self):
		''' prints current terminals to console '''

		build_page = self.get_page(BuildMDP)
		print('Terminals', build_page.terminals)

	def initialize(self):
		''' calls initialize from BuildMDP '''

		build_page = self.get_page(BuildMDP)
		build_page.initialize()

	def master_reset(self):
		''' calls master_reset from BuildMDP '''

		build_page = self.get_page(BuildMDP)
		build_page.master_reset()

	def build(self):
		''' runs specified mdp solving algorithm '''

		frame = SolveMDP(self.container, self)
		self.frames[SolveMDP] = frame
		frame.grid(row=0, column=0, sticky='nsew')
		self.show_frame(SolveMDP)
		build_page = self.get_page(BuildMDP)
		gridmdp = build_page.gridmdp
		terminals = build_page.terminals
		solve_page = self.get_page(SolveMDP)
		_height = self.shared_data['height'].get()
		_width = self.shared_data['width'].get()
		solve_page.create_graph(gridmdp, terminals, _height, _width)

	def show_frame(self, controller, cb=False):
		''' shows specified frame and optionally runs create_buttons '''

		if cb:
			build_page = self.get_page(BuildMDP)
			build_page.create_buttons()
		frame = self.frames[controller]
		frame.tkraise()


class HomePage(tk.Frame):

	def __init__(self, parent, controller):
		''' HomePage constructor '''

		tk.Frame.__init__(self, parent)
		self.controller = controller
		frame1 = tk.Frame(self)
		frame1.pack(side=tk.TOP)
		frame3 = tk.Frame(self)
		frame3.pack(side=tk.TOP)
		frame4 = tk.Frame(self)
		frame4.pack(side=tk.TOP)
		frame2 = tk.Frame(self)
		frame2.pack(side=tk.TOP)

		s = ttk.Style()
		s.theme_use('clam')
		s.configure('TButton', background=grayd, padding=0)
		s.configure('wall.TButton', background=gray2, foreground=white)
		s.configure('reward.TButton', background=gray9)
		s.configure('+term.TButton', background=green8)
		s.configure('-term.TButton', background=pblue, foreground=white)
		s.configure('=term.TButton', background=green4)

		label = ttk.Label(frame1, text='GridMDP builder', font=('Helvetica', 18, 'bold'), background=grayef)
		label.pack(pady=75, padx=50, side=tk.TOP)

		ec_btn = ttk.Button(frame3, text='Empty cells', width=20)
		ec_btn.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		ec_btn.configure(style='TButton')

		w_btn = ttk.Button(frame3, text='Walls', width=20)
		w_btn.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		w_btn.configure(style='wall.TButton')

		r_btn = ttk.Button(frame3, text='Rewards', width=20)
		r_btn.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		r_btn.configure(style='reward.TButton')

		term_p = ttk.Button(frame3, text='Positive terminals', width=20)
		term_p.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		term_p.configure(style='+term.TButton')

		term_z = ttk.Button(frame3, text='Neutral terminals', width=20)
		term_z.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		term_z.configure(style='=term.TButton')

		term_n = ttk.Button(frame3, text='Negative terminals', width=20)
		term_n.pack(pady=0, padx=0, side=tk.LEFT, ipady=10)
		term_n.configure(style='-term.TButton')

		label = ttk.Label(frame4, text='Dimensions', font=('Verdana', 14), background=grayef)
		label.pack(pady=15, padx=10, side=tk.TOP)
		entry_h = tk.Entry(frame2, textvariable=self.controller.shared_data['height'], font=('Verdana', 10), width=3, justify=tk.CENTER)
		entry_h.pack(pady=10, padx=10, side=tk.LEFT)
		label_x = ttk.Label(frame2, text='X', font=('Verdana', 10), background=grayef)
		label_x.pack(pady=10, padx=4, side=tk.LEFT)
		entry_w = tk.Entry(frame2, textvariable=self.controller.shared_data['width'], font=('Verdana', 10), width=3, justify=tk.CENTER)
		entry_w.pack(pady=10, padx=10, side=tk.LEFT)
		button = ttk.Button(self, text='Build a GridMDP', command=lambda: controller.show_frame(BuildMDP, cb=True))
		button.pack(pady=10, padx=10, side=tk.TOP, ipadx=20, ipady=10)
		button.configure(style='reward.TButton')


class BuildMDP(tk.Frame):

	def __init__(self, parent, controller):

		tk.Frame.__init__(self, parent)
		self.grid_rowconfigure(0, weight=1)
		self.grid_columnconfigure(0, weight=1)
		self.frame = tk.Frame(self)
		self.frame.pack()
		self.controller = controller

	def create_buttons(self):
		''' creates interactive cells to build MDP '''

		_height = self.controller.shared_data['height'].get()
		_width = self.controller.shared_data['width'].get()
		self.controller.menu_bar.entryconfig('Edit', state=tk.NORMAL)
		self.controller.menu_bar.entryconfig('Build', state=tk.NORMAL)
		self.gridmdp = [[0.0]*max(1, _width) for _ in range(max(1, _height))]
		self.buttons = [[None]*max(1, _width) for _ in range(max(1, _height))]
		self.terminals = []

		s = ttk.Style()
		s.theme_use('clam')
		s.configure('TButton', background=grayd, padding=0)
		s.configure('wall.TButton', background=gray2, foreground=white)
		s.configure('reward.TButton', background=gray9)
		s.configure('+term.TButton', background=green8)
		s.configure('-term.TButton', background=pblue, foreground=white)
		s.configure('=term.TButton', background=green4)

		for i in range(max(1, _height)):
			for j in range(max(1, _width)):
				self.buttons[i][j] = ttk.Button(self.frame, text=f'({_height - i - 1}, {j})', width=int(196/max(1, _width)), command=partial(dialogbox, i, j, self.gridmdp, self.terminals, self.buttons, _height))
				self.buttons[i][j].grid(row=i, column=j, ipady=int(336/max(1, _height)) - 12)

	def initialize(self):
		''' runs initialize_dialogbox '''

		_height = self.controller.shared_data['height'].get()
		_width = self.controller.shared_data['width'].get()
		initialize_dialogbox(_width, _height, self.gridmdp, self.terminals, self.buttons)

	def master_reset(self):
		''' runs external reset '''

		_height = self.controller.shared_data['height'].get()
		_width = self.controller.shared_data['width'].get()
		if tkinter.messagebox.askokcancel('Reset', 'Are you sure you want to reset all cells?'):
			external_reset(_width, _height, self.gridmdp, self.terminals, self.buttons)


class SolveMDP(tk.Frame):

	def __init__(self, parent, controller):

		tk.Frame.__init__(self, parent)
		self.grid_rowconfigure(0, weight=1)
		self.grid_columnconfigure(0, weight=1)
		self.frame = tk.Frame(self)
		self.frame.pack()
		self.controller = controller
		self.terminated = False
		self.iterations = 0
		self.epsilon = 0.001
		self.delta = 0

	def process_data(self, terminals, _height, _width, gridmdp):
		''' preprocess variables '''

		flipped_terminals = []

		for terminal in terminals:
			flipped_terminals.append((terminal[1], _height - terminal[0] - 1))

		grid_to_solve = [[0.0]*max(1, _width) for _ in range(max(1, _height))]
		grid_to_show = [[0.0]*max(1, _width) for _ in range(max(1, _height))]

		for i in range(max(1, _height)):
			for j in range(max(1, _width)):
				if gridmdp[i][j] == WALL_VALUE:
					grid_to_show[i][j] = 0.0
					grid_to_solve[i][j] = None

				else:
					grid_to_show[i][j] = grid_to_solve[i][j] = gridmdp[i][j]

		return flipped_terminals, grid_to_solve, np.flipud(grid_to_show)

	def create_graph(self, gridmdp, terminals, _height, _width):
		''' creates canvas and initializes value_iteration_paramteres '''

		self._height = _height
		self._width = _width
		self.controller.menu_bar.entryconfig('Edit', state=tk.DISABLED)
		self.controller.menu_bar.entryconfig('Build', state=tk.DISABLED)

		self.terminals, self.gridmdp, self.grid_to_show = self.process_data(terminals, _height, _width, gridmdp)
		self.sequential_decision_environment = GridMDP(self.gridmdp, terminals=self.terminals)

		self.initialize_value_iteration_parameters(self.sequential_decision_environment)

		self.canvas = FigureCanvasTkAgg(fig, self.frame)
		self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
		self.anim = animation.FuncAnimation(fig, self.animate_graph, interval=50)
		self.canvas.show()

	def animate_graph(self, i):
		''' performs value iteration and animates graph '''

		# cmaps to use: bone_r, Oranges, inferno, BrBG, copper
		self.iterations += 1
		x_interval = max(2, len(self.gridmdp[0]))
		y_interval = max(2, len(self.gridmdp))
		x = np.linspace(0, len(self.gridmdp[0]) - 1, x_interval)
		y = np.linspace(0, len(self.gridmdp) - 1, y_interval)

		sub.clear()
		sub.imshow(self.grid_to_show, cmap='BrBG', aspect='auto', interpolation='none', extent=extents(x) + extents(y), origin='lower')
		fig.tight_layout()

		U = self.U1.copy()

		for s in self.sequential_decision_environment.states:
			self.U1[s] = self.R(s) + self.gamma * max([sum([p * U[s1] for (p, s1) in self.T(s, a)]) for a in self.sequential_decision_environment.actions(s)])
			self.delta = max(self.delta, abs(self.U1[s] - U[s]))

		self.grid_to_show = grid_to_show = [[0.0]*max(1, self._width) for _ in range(max(1, self._height))]
		for k, v in U.items():
			self.grid_to_show[k[1]][k[0]] = v

		if (self.delta < self.epsilon * (1 - self.gamma) / self.gamma) or (self.iterations > 60) and self.terminated == False:
			self.terminated = True
			display(self.grid_to_show, self._height, self._width)

			pi = best_policy(self.sequential_decision_environment, value_iteration(self.sequential_decision_environment, .01))
			display_best_policy(self.sequential_decision_environment.to_arrows(pi), self._height, self._width)
		
		ax = fig.gca()
		ax.xaxis.set_major_locator(MaxNLocator(integer=True))
		ax.yaxis.set_major_locator(MaxNLocator(integer=True))

	def initialize_value_iteration_parameters(self, mdp):
		''' initializes value_iteration parameters '''

		self.U1 = {s: 0 for s in mdp.states}
		self.R, self.T, self.gamma = mdp.R, mdp.T, mdp.gamma

	def value_iteration_metastep(self, mdp, iterations=20):
		''' runs value_iteration '''

		U_over_time = []
		U1 = {s: 0 for s in mdp.states}
		R, T, gamma = mdp.R, mdp.T, mdp.gamma

		for _ in range(iterations):
			U = U1.copy()

			for s in mdp.states:
				U1[s] = R(s) + gamma * max([sum([p * U[s1] for (p, s1) in T(s, a)]) for a in mdp.actions(s)])

			U_over_time.append(U)
		return U_over_time


if __name__ == '__main__':
	app = MDPapp()
	app.geometry('1280x720')
	app.mainloop()