dp_gamblers_problem.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import matplotlib
import matplotlib.pyplot as plt

from introrl.dp_funcs.dp_value_iter import dp_value_iteration
from introrl.mdp_data.gamblers_problem import get_gambler

gambler = get_gambler(prob_heads=0.4)

policy, state_value = dp_value_iteration( gambler, allow_multi_actions=True,
                                          do_summ_print=True,fmt_V='%.4f',
                                          max_iter=1000, err_delta=0.00001, 
                                          gamma=1.0)
print( gambler.get_info() )

# --------------- plot logic -------------------
min_state_list = []
min_action_list = []

state_list = []
action_list = []
for i_state in range(1,100):
    aL = policy.get_list_of_all_action_desc_prob( i_state, incl_zero_prob=False)
    min_state_list.append( i_state - 0.5 )
    min_action_list.append( min([a for a,p in aL]) )

    min_state_list.append( i_state + 0.5 )
    min_action_list.append( min([a for a,p in aL]) )

    for a,p in aL:
        action_list.append( a )
        state_list.append( i_state )
    
fig, ax = plt.subplots()
plt.plot( min_state_list, min_action_list, '-' )
plt.title( 'Figure 4.3, Gamblers Optimum Policy' )
plt.xlabel('Capital')
plt.ylabel('Final Policy (stake)')

fig.savefig( 'figure_4_3_gamblers_policy.png' )

# --------------------------------
    
fig, ax = plt.subplots()
plt.plot( state_list, action_list, 's' )
plt.title( 'Figure 4.3, Gamblers Full Optimum Policy' )
plt.xlabel('Capital')
plt.ylabel('Final Policy (stake)')

fig.savefig( 'figure_4_3_gamblers_full_policy.png' )


plt.show()