JuliaReinforcementLearning
diff --git a/‎Dockerfile
+6 b/‎Dockerfile
+6
diff --git a/‎Project.toml
+1-2 b/‎Project.toml
+1-2
diff --git a/‎README.md
-2 b/‎README.md
-2
diff --git a/‎src/RLIntro.jl
+10 b/‎src/RLIntro.jl
+10
diff --git a/‎src/chapter02/ten_armed_testbed.jl
+22-23 b/‎src/chapter02/ten_armed_testbed.jl
+22-23
diff --git a/‎src/chapter03/grid_world.jl
+3-3 b/‎src/chapter03/grid_world.jl
+3-3
diff --git a/‎src/chapter04/car_rental.jl
+3-3 b/‎src/chapter04/car_rental.jl
+3-3
diff --git a/‎src/chapter04/gambler_problem.jl
+2-2 b/‎src/chapter04/gambler_problem.jl
+2-2
diff --git a/‎src/chapter04/grid_world.jl
+2-2 b/‎src/chapter04/grid_world.jl
+2-2
diff --git a/‎src/chapter05/blackjack.jl
+8-8 b/‎src/chapter05/blackjack.jl
+8-8
diff --git a/‎src/chapter05/leftright.jl
+2-2 b/‎src/chapter05/leftright.jl
+2-2
diff --git a/‎src/chapter06/cliff_walking.jl
+3-3 b/‎src/chapter06/cliff_walking.jl
+3-3
diff --git a/‎src/chapter06/maximization_bias.jl
+2-2 b/‎src/chapter06/maximization_bias.jl
+2-2
diff --git a/‎src/chapter06/randomwalk.jl
+6-7 b/‎src/chapter06/randomwalk.jl
+6-7
@@ -0,0 +1,6 @@
+FROM julia:1.1
+
+ADD . /RLIntro
+WORKDIR /RLIntro
+RUN ["julia", "-e", "using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate(); pkg\"precompile\""]
+CMD ["julia"]
@@ -7,15 +7,14 @@ version = "0.1.0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 Ju = "449ae9ca-b987-11e8-3919-0764a06dfe61"
-LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-StatPlots = "60ddc479-9b66-56df-82fc-76a74619b69c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -33,8 +33,6 @@ julia> @show [f for f in names(RLIntro) if startswith(string(f), "fig")];  # lis
 julia> fig_2_2()  # reproduce figure_2_2
 ```
 
-**Notice** that for some figures you may need to install *pdflatex*.
-
 ## Develop
 
 If you would like to make some improvements, I'd suggest the following workflow:
 
@@ -1,5 +1,7 @@
 module RLIntro
 
+export plot_all
+
 include("environments/environments.jl")
 
 using Reexport
@@ -17,4 +19,12 @@ include("chapter11/chapter11.jl")
 include("chapter12/chapter12.jl")
 include("chapter13/chapter13.jl")
 
+function plot_all(fig_dir=".")
+    for f in names(RLIntro)
+        if startswith(string(f), "fig")
+            @eval $f()
+        end
+    end
+end
+
 end # module
@@ -1,11 +1,10 @@
 using Ju
 using ..MultiArmBandits
 using Statistics
-using LaTeXStrings
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 function collect_best_actions()
     isbest = Vector{Bool}()
@@ -26,55 +25,55 @@ end
 
 ##############################
 
-function fig_2_1()
-    env = MultiArmBanditsEnv()
-    f = render(env)
-    savefig(f, figpath("2_1"))
-    f
-end
+# function fig_2_1()
+#     env = MultiArmBanditsEnv()
+#     f = render(env)
+#     savefig(f, "figure_2_1.png")
+#     f
+# end
 
 
 function fig_2_2()
     learner(ϵ) = QLearner(TabularQ(1, 10), EpsilonGreedySelector(ϵ), 0., cached_inverse_decay())
     p = plot(layout=(2, 1), dpi=200)
     for ϵ in [0.1, 0.01, 0.0]
         stats = [bandit_testbed(learner(ϵ)) for _ in 1:2000]
-        plot!(p, mean(x[1] for x in stats), subplot=1, legend=:bottomright, label=latexstring("\\epsilon=$ϵ"))
-        plot!(p, mean(x[2] for x in stats), subplot=2, legend=:bottomright, label=latexstring("\\epsilon=$ϵ"))
+        plot!(p, mean(x[1] for x in stats), subplot=1, legend=:bottomright, label="epsilon=$ϵ")
+        plot!(p, mean(x[2] for x in stats), subplot=2, legend=:bottomright, label="epsilon=$ϵ")
     end
-    savefig(p, figpath("2_2"))
+    savefig(p, "figure_2_2.png")
     p
 end
 
 function fig_2_3()
     learner1() = QLearner(TabularQ(1, 10, 5.), EpsilonGreedySelector(0.0), 0., 0.1)
     learner2() = QLearner(TabularQ(1, 10), EpsilonGreedySelector(0.1), 0., 0.1)
     p = plot(legend=:bottomright, dpi=200)
-    plot!(p, mean(bandit_testbed(learner1())[2] for _ in 1:2000), label=latexstring("Q_1=5, \\epsilon=0."))
-    plot!(p, mean(bandit_testbed(learner2())[2] for _ in 1:2000), label=latexstring("Q_1=0, \\epsilon=0.1"))
-    savefig(p, figpath("2_3"))
+    plot!(p, mean(bandit_testbed(learner1())[2] for _ in 1:2000), label="Q_1=5, epsilon=0.")
+    plot!(p, mean(bandit_testbed(learner2())[2] for _ in 1:2000), label="Q_1=0, epsilon=0.1")
+    savefig(p, "figure_2_3.png")
     p
 end
 
 function fig_2_4()
     learner1() = QLearner(TabularQ(1, 10), UpperConfidenceBound(10), 0., 0.1)
     learner2() = QLearner(TabularQ(1, 10), EpsilonGreedySelector(0.1), 0., 0.1)
     p = plot(legend=:bottomright, dpi=200)
-    plot!(p, mean(bandit_testbed(learner1())[1] for _ in 1:2000), label=latexstring("UpperConfidenceBound, c=2"))
-    plot!(p, mean(bandit_testbed(learner2())[1] for _ in 1:2000), label=latexstring("\\epsilon-greedy, \\epsilon=0.1"))
-    savefig(p, figpath("2_4"))
+    plot!(p, mean(bandit_testbed(learner1())[1] for _ in 1:2000), label="UpperConfidenceBound, c=2")
+    plot!(p, mean(bandit_testbed(learner2())[1] for _ in 1:2000), label="epsilon-greedy, epsilon=0.1")
+    savefig(p, "figure_2_4.png")
     p
 end
 
 function fig_2_5()
     learner(alpha, baseline) = GradientBanditLearner(TabularQ(1, 10), WeightedSample(), alpha, baseline)
     truevalue = 4.0
     p = plot(legend=:bottomright, dpi=200)
-    plot!(p, mean(bandit_testbed(learner(0.1, sample_avg()), truevalue)[2] for _ in 1:2000), label=latexstring("\\alpha = 0.1, with baseline"))
-    plot!(p, mean(bandit_testbed(learner(0.4, sample_avg()), truevalue)[2] for _ in 1:2000), label=latexstring("\\alpha = 0.4, with baseline"))
-    plot!(p, mean(bandit_testbed(learner(0.1, 0.), truevalue)[2] for _ in 1:2000), label=latexstring("\\alpha = 0.1, without baseline"))
-    plot!(p, mean(bandit_testbed(learner(0.4, 0.), truevalue)[2] for _ in 1:2000), label=latexstring("\\alpha = 0.4, without baseline"))
-    savefig(p, figpath("2_5"))
+    plot!(p, mean(bandit_testbed(learner(0.1, sample_avg()), truevalue)[2] for _ in 1:2000), label="alpha = 0.1, with baseline")
+    plot!(p, mean(bandit_testbed(learner(0.4, sample_avg()), truevalue)[2] for _ in 1:2000), label="alpha = 0.4, with baseline")
+    plot!(p, mean(bandit_testbed(learner(0.1, 0.), truevalue)[2] for _ in 1:2000), label="alpha = 0.1, without baseline")
+    plot!(p, mean(bandit_testbed(learner(0.4, 0.), truevalue)[2] for _ in 1:2000), label="alpha = 0.4, without baseline")
+    savefig(p, "figure_2_5.png")
     p
 end
 
@@ -89,6 +88,6 @@ function fig_2_6()
     plot!(p, -5:1, [mean(mean(bandit_testbed(gradient_learner(2.0^i))[1] for _ in 1:2000)) for i in -5:1], label="gradient")
     plot!(p, -4:2, [mean(mean(bandit_testbed(UpperConfidenceBound_learner(2.0^i))[1] for _ in 1:2000)) for i in -4:2], label="UCB")
     plot!(p, -2:2, [mean(mean(bandit_testbed(greedy_with_init_learner(2.0^i))[1] for _ in 1:2000)) for i in -2:2], label="greedy with initialization")
-    savefig(p, figpath("2_6"))
+    savefig(p, "figure_2_6.png")
     p
 end
@@ -2,7 +2,7 @@ using Ju
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const GridWorldLinearIndices = LinearIndices((5,5))
 const GridWorldCartesianIndices = CartesianIndices((5,5))
@@ -34,14 +34,14 @@ function fig_3_2()
     V, π = TabularV(25), RandomPolicy(fill(0.25, 25, 4))
     policy_evaluation!(V, π, GridWorldEnvModel)
     p = heatmap(1:5, 1:5, reshape(V.table, 5,5), yflip=true)
-    savefig(p, figpath("3_2"))
+    savefig(p, "figure_3_2.png")
     p
 end
 
 function fig_3_5()
     V, π = TabularV(25), DeterministicPolicy(rand(1:4, 25), 4)
     policy_iteration!(V, π, GridWorldEnvModel)
     p = heatmap(1:5, 1:5, reshape(V.table, 5,5), yflip=true)
-    savefig(p, figpath("3_5"))
+    savefig(p, "figure_3_5.png")
     p
 end
@@ -3,7 +3,7 @@ using Distributions
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const PoissonUpperBound = 10
 const MaxCars= 20
@@ -52,8 +52,8 @@ function fig_4_2(max_iter=100)
     V, π = TabularV((1+MaxCars)^2), DeterministicPolicy(zeros(Int,21^2), length(Actions))
     policy_iteration!(V, π, CarRentalEnvModel; γ=0.9, max_iter=max_iter)
     p1 = heatmap(0:MaxCars, 0:MaxCars, reshape([decode_action(x) for x in π.table], 1+MaxCars,1+MaxCars))
-    savefig(p1, figpath("4_2_policy"))
+    savefig(p1, "figure_4_2_policy.png")
     p2 = heatmap(0:MaxCars, 0:MaxCars, reshape(V.table, 1+MaxCars,1+MaxCars))
-    savefig(p2, figpath("4_2_value"))
+    savefig(p2, "figure_4_2_value.png")
     p1, p2
 end
@@ -3,7 +3,7 @@ using Distributions
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const pₕ = 0.4
 const WinCapital = 100
@@ -30,6 +30,6 @@ function fig_4_3(max_iter=typemax(Int))
     V = TabularV(1+WinCapital)
     value_iteration!(V, GamblerProblemEnvModel; γ=1.0, max_iter=max_iter)
     p = plot(V.table[2:end-1])
-    savefig(p, figpath("4_3"))
+    savefig(p, "figure_4_3.png")
     p
 end
@@ -2,7 +2,7 @@ using Ju
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const GridWorldLinearIndices = LinearIndices((4,4))
 const GridWorldCartesianIndices = CartesianIndices((4,4))
@@ -29,6 +29,6 @@ function fig_4_1()
     V, π = TabularV(16), RandomPolicy(fill(0.25, 16, 4))
     policy_evaluation!(V, π, GridWorldEnvModel; γ=1.0)
     p = heatmap(1:4, 1:4, reshape(V.table, 4,4), yflip=true)
-    savefig(p, figpath("4_1"))
+    savefig(p, "figure_4_1.png")
     p
 end
@@ -4,7 +4,7 @@ using StatsBase:mean
 using ..BlackJack
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const Indices = LinearIndices(size(observationspace(BlackJackEnv)))
 
@@ -29,8 +29,8 @@ function fig_5_1(n=10000)
                             for dealer_card in 2:11, player_sum in 11:21]
     p1 = heatmap(usable_ace_values)
     p2 = heatmap(no_usable_ace_values)
-    savefig(p1, figpath("5_1_usable_ace_n_$n"))
-    savefig(p2, figpath("5_1_no_usable_ace_n_$n"))
+    savefig(p1, "figure_5_1_usable_ace_n_$n.png")
+    savefig(p2, "figure_5_1_no_usable_ace_n_$n.png")
     p1, p2
 end
 
@@ -56,10 +56,10 @@ function fig_5_2(n=1000000)
     p2 = heatmap(no_usable_ace_values)
     p3 = heatmap(usable_ace_policy)
     p4 = heatmap(no_usable_ace_policy)
-    savefig(p1, figpath("5_2_usable_ace_n_$n"))
-    savefig(p2, figpath("5_2_no_usable_ace_n_$n"))
-    savefig(p3, figpath("5_2_usable_ace_policy_n_$n"))
-    savefig(p4, figpath("5_2_no_usable_ace_policy_n_$n"))
+    savefig(p1, "figure_5_2_usable_ace_n_$n.png")
+    savefig(p2, "figure_5_2_no_usable_ace_n_$n.png")
+    savefig(p3, "figure_5_2_usable_ace_policy_n_$n.png")
+    savefig(p4, "figure_5_2_no_usable_ace_policy_n_$n.png")
     p1, p2, p3, p4
 end
 
@@ -93,6 +93,6 @@ function fig_5_3(n=10000)
     end
     p = plot(mean((run() .- (-0.27726)).^2 for _ in 1:100), label="Weighted Importance Sampling")
     p = plot!(p, mean((run(:OrdinaryImportanceSampling) .- (-0.27726)).^2 for _ in 1:100), xscale=:log10, label="Ordinary Importance Sampling")
-    savefig(p, figpath("5_3"))
+    savefig(p, "figure_5_3.png")
     p
 end
@@ -4,7 +4,7 @@ using ..LeftRight
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 function fig_5_4()
     function value_collect()
@@ -31,6 +31,6 @@ function fig_5_4()
         train!(LeftRightEnv(), agent; callbacks = callbacks)
         plot!(p, callbacks[2](), xscale = :log10)
     end
-    savefig(p, figpath("5_4"))
+    savefig(p, "figure_5_4.png")
     p
 end
@@ -4,7 +4,7 @@ using ..CliffWalking
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 function rewards_of_each_episode()
     rewards = []
@@ -61,7 +61,7 @@ function fig_6_3_a()
     p = plot(legend=:bottomright, dpi=200)
     plot!(p, mean(rewards(gen_env_Qagent()...) for _ in 1:100), label="QLearning")
     plot!(p, mean(rewards(gen_env_SARSAagent()...) for _ in 1:100), label="SARSA")
-    savefig(p, figpath("6_3_a"))
+    savefig(p, "figure_6_3_a.png")
     p
 end
 
@@ -82,6 +82,6 @@ function fig_6_3_b()
     plot!(p, A, [mean(avg_reward_per_episode(1000, gen_env_Qagent(α)...) for _ in 1:10) for α in A], label="Asymptotic interim Q")
     plot!(p, A, [mean(avg_reward_per_episode(1000, gen_env_SARSAagent(α)...) for _ in 1:10) for α in A], label="Asymptotic SARSA")
     plot!(p, A, [mean(avg_reward_per_episode(1000, gen_env_ExpectedSARSAagent(α)...) for _ in 1:10) for α in A], label="Asymptotic ExpectedSARSA")
-    savefig(p, figpath("6_3_b"))
+    savefig(p, "figure_6_3_b.png")
     p
 end
@@ -4,7 +4,7 @@ using StatsBase:mean
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 function count_left_actions_from_A()
     counts_per_episode = []
@@ -54,6 +54,6 @@ function fig_6_5()
     p = plot(legend=:topright, dpi=200)
     plot!(p, mean(run_once(gen_env_DQagent()...) for _ in 1:10000), label="Double-Q")
     plot!(p, mean(run_once(gen_env_Qagent()...) for _ in 1:10000), label="Q")
-    savefig(p, figpath("6_5"))
+    savefig(p, "figure_6_5.png")
     p
 end
@@ -1,11 +1,10 @@
 using Ju
 using Statistics
-using LaTeXStrings
 using ..RandomWalk
 using Plots
 gr()
 
-figpath(f) = "docs/src/assets/figures/figure_$f.png"
+
 
 const true_values = [i/6 for i in 1:5]
 
@@ -54,7 +53,7 @@ function fig_6_2_a()
         train!(env, agent; callbacks = (stop_at_episode(i),))
         plot!(p, agent.learner.approximator.table[2:end - 1])
     end
-    savefig(p, figpath("6_2_a"))
+    savefig(p, "figure_6_2_a.png")
     p
 end
 
@@ -63,15 +62,15 @@ function fig_6_2_b()
     for α in [0.05, 0.1, 0.15]
         callbacks = (stop_at_episode(100), record_rms())
         train!(gen_env_TDagent(α)...;callbacks = callbacks)
-        plot!(p, callbacks[2](), label = latexstring("TD \\alpha=$α"))
+        plot!(p, callbacks[2](), label ="TD alpha=$α")
     end
 
     for α in [0.01, 0.02, 0.03, 0.04]
         callbacks = (stop_at_episode(100), record_rms())
         train!(gen_env_MCagent(α)...;callbacks = callbacks)
-        plot!(p, callbacks[2](), label = latexstring("MC \\alpha=$α"))
+        plot!(p, callbacks[2](), label ="MC alpha=$α")
     end
-    savefig(p, figpath("6_2_b"))
+    savefig(p, "figure_6_2_b.png")
     p
 end
 
@@ -93,6 +92,6 @@ function fig_6_2_c()
     end
     plot!(mean(avg_rms), color=:red, label="MC")
 
-    savefig(p, figpath("6_2_c"))
+    savefig(p, "figure_6_2_c.png")
     p
 end