1
1
using Ju
2
2
using .. MultiArmBandits
3
3
using Statistics
4
- using LaTeXStrings
5
4
using Plots
6
5
gr ()
7
6
8
- figpath (f) = " docs/src/assets/figures/figure_ $f .png "
7
+
9
8
10
9
function collect_best_actions ()
11
10
isbest = Vector {Bool} ()
26
25
27
26
# #############################
28
27
29
- function fig_2_1 ()
30
- env = MultiArmBanditsEnv ()
31
- f = render (env)
32
- savefig (f, figpath ( " 2_1 " ) )
33
- f
34
- end
28
+ # function fig_2_1()
29
+ # env = MultiArmBanditsEnv()
30
+ # f = render(env)
31
+ # savefig(f, "figure_2_1.png" )
32
+ # f
33
+ # end
35
34
36
35
37
36
function fig_2_2 ()
38
37
learner (ϵ) = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (ϵ), 0. , cached_inverse_decay ())
39
38
p = plot (layout= (2 , 1 ), dpi= 200 )
40
39
for ϵ in [0.1 , 0.01 , 0.0 ]
41
40
stats = [bandit_testbed (learner (ϵ)) for _ in 1 : 2000 ]
42
- plot! (p, mean (x[1 ] for x in stats), subplot= 1 , legend= :bottomright , label= latexstring ( " \\ epsilon=$ϵ " ) )
43
- plot! (p, mean (x[2 ] for x in stats), subplot= 2 , legend= :bottomright , label= latexstring ( " \\ epsilon=$ϵ " ) )
41
+ plot! (p, mean (x[1 ] for x in stats), subplot= 1 , legend= :bottomright , label= " epsilon=$ϵ " )
42
+ plot! (p, mean (x[2 ] for x in stats), subplot= 2 , legend= :bottomright , label= " epsilon=$ϵ " )
44
43
end
45
- savefig (p, figpath ( " 2_2 " ) )
44
+ savefig (p, " figure_2_2.png " )
46
45
p
47
46
end
48
47
49
48
function fig_2_3 ()
50
49
learner1 () = QLearner (TabularQ (1 , 10 , 5. ), EpsilonGreedySelector (0.0 ), 0. , 0.1 )
51
50
learner2 () = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (0.1 ), 0. , 0.1 )
52
51
p = plot (legend= :bottomright , dpi= 200 )
53
- plot! (p, mean (bandit_testbed (learner1 ())[2 ] for _ in 1 : 2000 ), label= latexstring ( " Q_1=5, \\ epsilon=0." ) )
54
- plot! (p, mean (bandit_testbed (learner2 ())[2 ] for _ in 1 : 2000 ), label= latexstring ( " Q_1=0, \\ epsilon=0.1" ) )
55
- savefig (p, figpath ( " 2_3 " ) )
52
+ plot! (p, mean (bandit_testbed (learner1 ())[2 ] for _ in 1 : 2000 ), label= " Q_1=5, epsilon=0." )
53
+ plot! (p, mean (bandit_testbed (learner2 ())[2 ] for _ in 1 : 2000 ), label= " Q_1=0, epsilon=0.1" )
54
+ savefig (p, " figure_2_3.png " )
56
55
p
57
56
end
58
57
59
58
function fig_2_4 ()
60
59
learner1 () = QLearner (TabularQ (1 , 10 ), UpperConfidenceBound (10 ), 0. , 0.1 )
61
60
learner2 () = QLearner (TabularQ (1 , 10 ), EpsilonGreedySelector (0.1 ), 0. , 0.1 )
62
61
p = plot (legend= :bottomright , dpi= 200 )
63
- plot! (p, mean (bandit_testbed (learner1 ())[1 ] for _ in 1 : 2000 ), label= latexstring ( " UpperConfidenceBound, c=2" ) )
64
- plot! (p, mean (bandit_testbed (learner2 ())[1 ] for _ in 1 : 2000 ), label= latexstring ( " \\ epsilon-greedy, \\ epsilon=0.1" ) )
65
- savefig (p, figpath ( " 2_4 " ) )
62
+ plot! (p, mean (bandit_testbed (learner1 ())[1 ] for _ in 1 : 2000 ), label= " UpperConfidenceBound, c=2" )
63
+ plot! (p, mean (bandit_testbed (learner2 ())[1 ] for _ in 1 : 2000 ), label= " epsilon-greedy, epsilon=0.1" )
64
+ savefig (p, " figure_2_4.png " )
66
65
p
67
66
end
68
67
69
68
function fig_2_5 ()
70
69
learner (alpha, baseline) = GradientBanditLearner (TabularQ (1 , 10 ), WeightedSample (), alpha, baseline)
71
70
truevalue = 4.0
72
71
p = plot (legend= :bottomright , dpi= 200 )
73
- plot! (p, mean (bandit_testbed (learner (0.1 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.1, with baseline" ) )
74
- plot! (p, mean (bandit_testbed (learner (0.4 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.4, with baseline" ) )
75
- plot! (p, mean (bandit_testbed (learner (0.1 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.1, without baseline" ) )
76
- plot! (p, mean (bandit_testbed (learner (0.4 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= latexstring ( " \\ alpha = 0.4, without baseline" ) )
77
- savefig (p, figpath ( " 2_5 " ) )
72
+ plot! (p, mean (bandit_testbed (learner (0.1 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.1, with baseline" )
73
+ plot! (p, mean (bandit_testbed (learner (0.4 , sample_avg ()), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.4, with baseline" )
74
+ plot! (p, mean (bandit_testbed (learner (0.1 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.1, without baseline" )
75
+ plot! (p, mean (bandit_testbed (learner (0.4 , 0. ), truevalue)[2 ] for _ in 1 : 2000 ), label= " alpha = 0.4, without baseline" )
76
+ savefig (p, " figure_2_5.png " )
78
77
p
79
78
end
80
79
@@ -89,6 +88,6 @@ function fig_2_6()
89
88
plot! (p, - 5 : 1 , [mean (mean (bandit_testbed (gradient_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 5 : 1 ], label= " gradient" )
90
89
plot! (p, - 4 : 2 , [mean (mean (bandit_testbed (UpperConfidenceBound_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 4 : 2 ], label= " UCB" )
91
90
plot! (p, - 2 : 2 , [mean (mean (bandit_testbed (greedy_with_init_learner (2.0 ^ i))[1 ] for _ in 1 : 2000 )) for i in - 2 : 2 ], label= " greedy with initialization" )
92
- savefig (p, figpath ( " 2_6 " ) )
91
+ savefig (p, " figure_2_6.png " )
93
92
p
94
93
end
0 commit comments