plot_blackjack_es.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
import sys
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np

from introrl.black_box_sims.blackjack_sim import BlackJackSimulation
from introrl.policy import Policy
from introrl.agent_supt.action_value_run_ave_coll import ActionValueRunAveColl

BJ = BlackJackSimulation()
av = ActionValueRunAveColl( BJ )
av.init_from_pickle_file( fname='blackjack_es')

sv = av.build_Vs_from_Qsa( BJ )
sv.summ_print( showRunningAve=False )


# --------------------------------------------------------------
fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = list( range(12, 22) )
Y = list( range(1, 11) )
Z = []
for y in  Y:
    rowL = []
    for x in X:
        s_hash = (x, True, y)
        rowL.append( sv.get_ave(s_hash) )
    Z.append( rowL )

X, Y = np.meshgrid(X, Y)
Z = np.array( Z )

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
ax.set_ylim(10, 1)
ax.set_xlim(12, 21)
ax.set_title( 'Usable Ace Monte Carlo ES' )

ax.set_xlabel('Player Sum')
ax.set_ylabel('Dealer Showing')
ax.set_zlabel('V(s)')

ax.view_init( elev=30.0, azim=-145.0)

fig.savefig("fig_5_2_w_ace_mc_es.png")

# --------------------------------------------------------------
fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = list( range(12, 22) )
Y = list( range(1, 11) )
Z = []
for y in  Y:
    rowL = []
    for x in X:
        s_hash = (x, False, y)
        rowL.append( sv.get_ave(s_hash) )
    Z.append( rowL )

X, Y = np.meshgrid(X, Y)
Z = np.array( Z )

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
ax.set_ylim(10, 1)
ax.set_xlim(12, 21)
ax.set_title( 'No Usable Ace  Monte Carlo ES' )

ax.set_xlabel('Player Sum')
ax.set_ylabel('Dealer Showing')
ax.set_zlabel('V(s)')

ax.view_init( elev=30.0, azim=-145.0)

fig.savefig("fig_5_2_noace_mc_es.png")


# Add a color bar which maps values to colors.
#fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()