plot_blackjack_10000_eval.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np

from introrl.black_box_sims.blackjack_sim import BlackJackSimulation
from introrl.policy import Policy
from introrl.agent_supt.state_value_run_ave_coll import StateValueRunAveColl

BJ = BlackJackSimulation()
sv = StateValueRunAveColl( BJ )
sv.read_pickle_file( fname='mc_blackjack_10000_eval')

#sv.summ_print( showRunningAve=False )


# --------------------------------------------------------------
fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = list( range(12, 22) )
Y = list( range(1, 11) )
Z = []
for y in  Y:
    rowL = []
    for x in X:
        s_hash = (x, True, y)
        rowL.append( sv.get_ave(s_hash) )
    Z.append( rowL )

X, Y = np.meshgrid(X, Y)
Z = np.array( Z )

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
ax.set_ylim(10, 1)
ax.set_xlim(12, 21)
ax.set_title( 'Usable Ace after 10,000 Episodes' )

ax.set_xlabel('Player Sum')
ax.set_ylabel('Dealer Showing')
ax.set_zlabel('V(s)')

ax.view_init( elev=30.0, azim=-145.0)

fig.savefig("fig_5_1_w_ace_10000.png")

# --------------------------------------------------------------
fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = list( range(12, 22) )
Y = list( range(1, 11) )
Z = []
for y in  Y:
    rowL = []
    for x in X:
        s_hash = (x, False, y)
        rowL.append( sv.get_ave(s_hash) )
    Z.append( rowL )

X, Y = np.meshgrid(X, Y)
Z = np.array( Z )

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
ax.set_ylim(10, 1)
ax.set_xlim(12, 21)
ax.set_title( 'No Usable Ace after 10,000 Episodes' )

ax.set_xlabel('Player Sum')
ax.set_ylabel('Dealer Showing')
ax.set_zlabel('V(s)')

ax.view_init( elev=30.0, azim=-145.0)

fig.savefig("fig_5_1_noace_10000.png")


# Add a color bar which maps values to colors.
#fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()