From ef347108d882d82b5dc69935eca1ad7d0b49b087 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 17 Nov 2017 17:09:13 +1100 Subject: [PATCH 01/13] Adds env.total_reward as total reward since last reset! Plus: * update abstract type syntax for 0.6+ * remove a non-universal linux workaround --- src/OpenAIGym.jl | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index 9e8353a..a037aa0 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -19,7 +19,7 @@ const _py_envs = Dict{String,Any}() # -------------------------------------------------------------- -abstract AbstractGymEnv <: AbstractEnvironment +abstract type AbstractGymEnv <: AbstractEnvironment end "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework" type GymEnv <: AbstractGymEnv @@ -27,6 +27,7 @@ type GymEnv <: AbstractGymEnv pyenv # the python "env" object state reward::Float64 + total_reward::Float64 actions::AbstractSet done::Bool info::Dict @@ -37,6 +38,7 @@ GymEnv(name) = gym(name) function Reinforce.reset!(env::GymEnv) env.state = env.pyenv[:reset]() env.reward = 0.0 + env.total_reward = 0.0 env.actions = actions(env, nothing) env.done = false end @@ -47,6 +49,7 @@ type UniverseEnv <: AbstractGymEnv pyenv # the python "env" object state reward + total_reward actions::AbstractSet done info::Dict @@ -57,6 +60,7 @@ UniverseEnv(name) = gym(name) function Reinforce.reset!(env::UniverseEnv) env.state = env.pyenv[:reset]() env.reward = [0.0] + env.total_reward = 0.0 env.actions = actions(env, nothing) env.done = [false] end @@ -92,7 +96,8 @@ end # -------------------------------------------------------------- -render(env::AbstractGymEnv, args...) = env.pyenv[:render]() +render(env::AbstractGymEnv, args...; kwargs...) = + pycall(env.pyenv[:render], PyAny; kwargs...) # -------------------------------------------------------------- @@ -148,20 +153,26 @@ function Reinforce.step!(env::GymEnv, s, a) # info("Going to take action: $a") pyact = pyaction(a) s′, r, env.done, env.info = env.pyenv[:step](pyact) - env.reward, env.state = r, s′ + env.reward = r + env.total_reward += r + env.state = s′ + r, s′ end function Reinforce.step!(env::UniverseEnv, s, a) info("Going to take action: $a") pyact = Any[pyaction(a)] s′, r, env.done, env.info = env.pyenv[:step](pyact) - env.reward, env.state = r, s′ + env.reward = r + env.total_reward += r[1] # assuming it's an array based on `reset!` + env.state = s′ + r, s′ end +Reinforce.finished(env::GymEnv) = env.done Reinforce.finished(env::GymEnv, s′) = env.done Reinforce.finished(env::UniverseEnv, s′) = all(env.done) - # -------------------------------------------------------------- @@ -175,12 +186,12 @@ end function __init__() - @static if is_linux() - # due to a ssl library bug, I have to first load the ssl lib here - condadir = Pkg.dir("Conda","deps","usr","lib") - Libdl.dlopen(joinpath(condadir, "libssl.so")) - Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so")) - end + # @static if is_linux() + # # due to a ssl library bug, I have to first load the ssl lib here + # condadir = Pkg.dir("Conda","deps","usr","lib") + # Libdl.dlopen(joinpath(condadir, "libssl.so")) + # Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so")) + # end global const pygym = pyimport("gym") end From 670eff8d164206763f30896f85dc9dbc0e78861a Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 00:04:22 +1000 Subject: [PATCH 02/13] Remove Universe AFAICT Universe is not maintained by OpenAI anymore, and should probably have it's own file/project anyway to reduce clutter --- src/OpenAIGym.jl | 65 ------------------------------------------------ 1 file changed, 65 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index a037aa0..560746d 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -43,49 +43,12 @@ function Reinforce.reset!(env::GymEnv) env.done = false end -"A simple wrapper around the OpenAI gym environments to add to the Reinforce framework" -type UniverseEnv <: AbstractGymEnv - name::String - pyenv # the python "env" object - state - reward - total_reward - actions::AbstractSet - done - info::Dict - UniverseEnv(name,pyenv) = new(name,pyenv) -end -UniverseEnv(name) = gym(name) - -function Reinforce.reset!(env::UniverseEnv) - env.state = env.pyenv[:reset]() - env.reward = [0.0] - env.total_reward = 0.0 - env.actions = actions(env, nothing) - env.done = [false] -end - function gym(name::AbstractString) env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0") @pyimport gym_soccer get!(_py_envs, name) do GymEnv(name, pygym[:make](name)) end - elseif split(name, ".")[1] in ("flashgames", "wob") - @pyimport universe - @pyimport universe.wrappers as wrappers - if !isdefined(OpenAIGym, :vnc_event) - global const vnc_event = PyCall.pywrap(PyCall.pyimport("universe.spaces.vnc_event")) - end - get!(_py_envs, name) do - pyenv = wrappers.SafeActionSpace(pygym[:make](name)) - pyenv[:configure](remotes=1) # automatically creates a local docker container - # pyenv[:configure](remotes="vnc://localhost:5900+15900") - o = UniverseEnv(name, pyenv) - # finalizer(o, o.pyenv[:close]()) - sleep(2) - o - end else GymEnv(name, pygym[:make](name)) end @@ -121,14 +84,6 @@ function actionset(A::PyObject) # # error("Unsupported shape for IntervalSet: $(A[:shape])") # [IntervalSet{Float64}(lo[i], hi[i]) for i=1:length(lo)] # end - elseif haskey(A, :buttonmasks) - # assumed VNC actions... keys to press, buttons to mask, and screen position - # keyboard = DiscreteSet(A[:keys]) - keyboard = KeyboardActionSet(A[:keys]) - buttons = DiscreteSet(Int[bm for bm in A[:buttonmasks]]) - width,height = A[:screen_shape] - mouse = MouseActionSet(width, height, buttons) - TupleSet(keyboard, mouse) elseif haskey(A, :actions) # Hardcoded TupleSet(DiscreteSet(A[:actions])) @@ -145,8 +100,6 @@ function Reinforce.actions(env::AbstractGymEnv, s′) end pyaction(a::Vector) = Any[pyaction(ai) for ai=a] -pyaction(a::KeyboardAction) = Any[a.key] -pyaction(a::MouseAction) = Any[vnc_event.PointerEvent(a.x, a.y, a.button)] pyaction(a) = a function Reinforce.step!(env::GymEnv, s, a) @@ -159,19 +112,8 @@ function Reinforce.step!(env::GymEnv, s, a) r, s′ end -function Reinforce.step!(env::UniverseEnv, s, a) - info("Going to take action: $a") - pyact = Any[pyaction(a)] - s′, r, env.done, env.info = env.pyenv[:step](pyact) - env.reward = r - env.total_reward += r[1] # assuming it's an array based on `reset!` - env.state = s′ - r, s′ -end - Reinforce.finished(env::GymEnv) = env.done Reinforce.finished(env::GymEnv, s′) = env.done -Reinforce.finished(env::UniverseEnv, s′) = all(env.done) # -------------------------------------------------------------- @@ -186,13 +128,6 @@ end function __init__() - # @static if is_linux() - # # due to a ssl library bug, I have to first load the ssl lib here - # condadir = Pkg.dir("Conda","deps","usr","lib") - # Libdl.dlopen(joinpath(condadir, "libssl.so")) - # Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so")) - # end - global const pygym = pyimport("gym") end From 3b5ed5da5c67f76e984ee887e7fa7db578b51ab8 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 00:25:44 +1000 Subject: [PATCH 03/13] 1.0 support --- src/OpenAIGym.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index 560746d..36f7d37 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -22,7 +22,7 @@ const _py_envs = Dict{String,Any}() abstract type AbstractGymEnv <: AbstractEnvironment end "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework" -type GymEnv <: AbstractGymEnv +mutable struct GymEnv <: AbstractGymEnv name::String pyenv # the python "env" object state @@ -45,7 +45,7 @@ end function gym(name::AbstractString) env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0") - @pyimport gym_soccer + Base.copy!(gym_soccer, pyimport("gym_soccer")) get!(_py_envs, name) do GymEnv(name, pygym[:make](name)) end @@ -127,8 +127,12 @@ end +global const pygym = PyNULL() +global const pysoccer = PyNULL() + function __init__() - global const pygym = pyimport("gym") + # the copy! puts the gym module into `pygym`, handling python ref-counting + Base.copy!(pygym, pyimport("gym")) end end # module From fca8d08c686702f7b7eba5b10676d1887edf03ce Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 17:23:13 +1000 Subject: [PATCH 04/13] Remove state param from `step!(env, ...)` --- src/OpenAIGym.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index 36f7d37..b6ce2a5 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -102,7 +102,7 @@ end pyaction(a::Vector) = Any[pyaction(ai) for ai=a] pyaction(a) = a -function Reinforce.step!(env::GymEnv, s, a) +function Reinforce.step!(env::GymEnv, a) # info("Going to take action: $a") pyact = pyaction(a) s′, r, env.done, env.info = env.pyenv[:step](pyact) From 1b9733bfbdf76108424d3011fb6fd1b770f27955 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 00:55:40 +1000 Subject: [PATCH 05/13] Add tests that double as benchmarks --- test/runtests.jl | 141 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 9 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 143398f..a55aaad 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,13 +1,136 @@ -using OpenAIGym -using Base.Test +using OpenAIGym, Compat.Test +using PyCall -# write your own tests here -@test 1 == 1 +""" +`function time_steps(env::GymEnv{T}, num_eps::Int) where T` -if isinteractive() - env = GymEnv("CartPole-v0") - for i=1:5 - R = run_episode(()->nothing, env, RandomPolicy()) - info("Episode $i finished. Total reward: $R") +run through num_eps eps, recording the time taken for each step and +how many steps were made. Doesn't time the `reset!` or the first step of each +episode (since higher chance that it's slower/faster than the rest, and we want +to compare the average time taken for each step as fairly as possible) +""" +# function time_steps(env::GymEnv{T}, num_eps::Int) where T +function time_steps(env::GymEnv, num_eps::Int) + t = 0.0 + steps = 0 + for i in 1:num_eps + reset!(env) + step!(env, rand(env.actions)) # ignore the first step - it might be slow? + t += (@elapsed steps += epstep(env)) + end + steps, t +end + +""" +Steps through an episode until it's `done` +assumes env has been `reset!` +""" +# function epstep(env::GymEnv{T}) where T +function epstep(env::GymEnv) + steps = 0 + while !env.done + steps += 1 + r, s = step!(env, rand(env.actions)) + end + steps +end + +@testset "Gym Basics" begin + + pong = GymEnv("Pong-v4") + pongnf = GymEnv("PongNoFrameskip-v4") + pacman = GymEnv("MsPacman-v4") + pacmannf = GymEnv("MsPacmanNoFrameskip-v4") + cartpole = GymEnv("CartPole-v0") + bj = GymEnv("Blackjack-v0") + + allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] + eps2trial = Dict(pong=>4, pongnf=>4, pacman=>9, pacmannf=>9, cartpole=>5000, bj=>30000) + atarienvs = [pong, pongnf, pacman, pacmannf] + envs = allenvs + + @testset "envs load" begin + # check they all work - no errors == no worries + println("------------------------------ Check envs load ------------------------------") + for (i, env) in enumerate(envs) + @show env.name env.pyenv a = rand(env.actions)|>OpenAIGym.pyaction PyObject(a)|>pytypeof + time_steps(env, 1) + @test !ispynull(env.pyenv) + println("------------------------------") + end + end + + @testset "julia speed test" begin + println("------------------------------ Begin Julia Speed Check ------------------------------") + for env in envs + num_eps = eps2trial[env] + steps, t = time_steps(env, num_eps) + @show env.name num_eps t steps + println("microsecs/step (lower is better): ", t*1e6/steps) + println("------------------------------") + end + println("------------------------------ End Julia Speed Check ------------------------------\n") + end + + @testset "python speed test" begin + println("------------------------------ Begin Python Speed Check ------------------------------") + py""" + import gym + import numpy as np + + pong = gym.make("Pong-v4") + pongnf = gym.make("PongNoFrameskip-v4") + pacman = gym.make("MsPacman-v4"); + pacmannf = gym.make("MsPacmanNoFrameskip-v4"); + cartpole = gym.make("CartPole-v0") + bj = gym.make("Blackjack-v0") + + allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] + eps2trial = {pong: 4, pongnf: 4, pacman: 9, pacmannf: 9, cartpole: 5000, bj: 30000} + atarienvs = [pong, pongnf, pacman, pacmannf]; + + envs = allenvs + + import time + class Timer(object): + elapsed = 0.0 + def __init__(self, name=None): + self.name = name + + def __enter__(self): + self.tstart = time.time() + + def __exit__(self, type, value, traceback): + Timer.elapsed = time.time() - self.tstart + + def time_steps(env, num_eps): + t = 0.0 + steps = 0 + for i in range(num_eps): + env.reset() + with Timer(): + steps += epstep(env) + t += Timer.elapsed + return steps, t + + def epstep(env): + steps = 0 + while True: + steps += 1 + action = env.action_space.sample() + state, reward, done, info = env.step(action) + if done == True: + break + return steps + + for env in envs: + num_eps = eps2trial[env] + with Timer(): + steps, s = time_steps(env, num_eps) + t = Timer.elapsed + print(f"{env} num_eps: {num_eps} t: {t} steps: {steps} \n microsecs/step (lower is better): {t*1e6/steps}") + print("------------------------------") + """ + println("------------------------------ End Python Speed Check ------------------------------") end end From 2152f2239e1c8d989e14d457a3eea9b081209518 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 19 Sep 2018 01:38:38 +1000 Subject: [PATCH 06/13] Reduce num episodes for slow envs to speed up tests And improve test output formatting --- test/runtests.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index a55aaad..c86fa40 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,6 @@ how many steps were made. Doesn't time the `reset!` or the first step of each episode (since higher chance that it's slower/faster than the rest, and we want to compare the average time taken for each step as fairly as possible) """ -# function time_steps(env::GymEnv{T}, num_eps::Int) where T function time_steps(env::GymEnv, num_eps::Int) t = 0.0 steps = 0 @@ -25,7 +24,6 @@ end Steps through an episode until it's `done` assumes env has been `reset!` """ -# function epstep(env::GymEnv{T}) where T function epstep(env::GymEnv) steps = 0 while !env.done @@ -45,7 +43,7 @@ end bj = GymEnv("Blackjack-v0") allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] - eps2trial = Dict(pong=>4, pongnf=>4, pacman=>9, pacmannf=>9, cartpole=>5000, bj=>30000) + eps2trial = Dict(pong=>1, pongnf=>1, pacman=>2, pacmannf=>2, cartpole=>100, bj=>30000) atarienvs = [pong, pongnf, pacman, pacmannf] envs = allenvs @@ -53,7 +51,9 @@ end # check they all work - no errors == no worries println("------------------------------ Check envs load ------------------------------") for (i, env) in enumerate(envs) - @show env.name env.pyenv a = rand(env.actions)|>OpenAIGym.pyaction PyObject(a)|>pytypeof + a = rand(env.actions) |> OpenAIGym.pyaction + action_type = a |> PyObject |> pytypeof + println("env.pyenv: $(env.pyenv) action_type: $action_type ex: $a") time_steps(env, 1) @test !ispynull(env.pyenv) println("------------------------------") @@ -65,7 +65,7 @@ end for env in envs num_eps = eps2trial[env] steps, t = time_steps(env, num_eps) - @show env.name num_eps t steps + println("env.pyenv: $(env.pyenv) num_eps: $num_eps t: $t steps: $steps") println("microsecs/step (lower is better): ", t*1e6/steps) println("------------------------------") end From 32b64becdcc6b645450b8422234664007f9a76bf Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 19 Sep 2018 02:09:25 +1000 Subject: [PATCH 07/13] Run envs the same number of eps and steps in each lang --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index c86fa40..6ada7bd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,7 +14,7 @@ function time_steps(env::GymEnv, num_eps::Int) steps = 0 for i in 1:num_eps reset!(env) - step!(env, rand(env.actions)) # ignore the first step - it might be slow? + # step!(env, rand(env.actions)) # ignore the first step - it might be slow? t += (@elapsed steps += epstep(env)) end steps, t @@ -43,7 +43,7 @@ end bj = GymEnv("Blackjack-v0") allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] - eps2trial = Dict(pong=>1, pongnf=>1, pacman=>2, pacmannf=>2, cartpole=>100, bj=>30000) + eps2trial = Dict(pong=>2, pongnf=>1, pacman=>2, pacmannf=>1, cartpole=>400, bj=>30000) atarienvs = [pong, pongnf, pacman, pacmannf] envs = allenvs @@ -86,7 +86,7 @@ end bj = gym.make("Blackjack-v0") allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] - eps2trial = {pong: 4, pongnf: 4, pacman: 9, pacmannf: 9, cartpole: 5000, bj: 30000} + eps2trial = {pong: 2, pongnf: 1, pacman: 2, pacmannf: 1, cartpole: 400, bj: 30000} atarienvs = [pong, pongnf, pacman, pacmannf]; envs = allenvs From 8a9d1b2a0ddf7ec7a5d5448f7edf4476c19705db Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 19 Sep 2018 02:15:00 +1000 Subject: [PATCH 08/13] Remove unused code --- src/OpenAIGym.jl | 10 ---------- test/runtests.jl | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index b6ce2a5..fd09171 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -117,16 +117,6 @@ Reinforce.finished(env::GymEnv, s′) = env.done # -------------------------------------------------------------- - -function test_env(name::String = "CartPole-v0") - env = gym(name) - for sars′ in Episode(env, RandomPolicy()) - render(env) - end -end - - - global const pygym = PyNULL() global const pysoccer = PyNULL() diff --git a/test/runtests.jl b/test/runtests.jl index 6ada7bd..d8b973b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -53,7 +53,7 @@ end for (i, env) in enumerate(envs) a = rand(env.actions) |> OpenAIGym.pyaction action_type = a |> PyObject |> pytypeof - println("env.pyenv: $(env.pyenv) action_type: $action_type ex: $a") + println("env.pyenv: $(env.pyenv) action_type: $action_type (e.g. $a)") time_steps(env, 1) @test !ispynull(env.pyenv) println("------------------------------") From a42b8da8cb9f921911a3f94bc4d75ce8a5d9813f Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 01:45:42 +1000 Subject: [PATCH 09/13] Add state type param to env for speed --- src/OpenAIGym.jl | 100 +++++++++++++++++++++++++++++++++++------------ test/runtests.jl | 2 +- 2 files changed, 76 insertions(+), 26 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index fd09171..baf8fc3 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -13,7 +13,8 @@ import Reinforce: export gym, GymEnv, - test_env + test_env, + PyAny const _py_envs = Dict{String,Any}() @@ -22,35 +23,38 @@ const _py_envs = Dict{String,Any}() abstract type AbstractGymEnv <: AbstractEnvironment end "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework" -mutable struct GymEnv <: AbstractGymEnv +mutable struct GymEnv{T} <: AbstractGymEnv name::String - pyenv # the python "env" object - state + pyenv::PyObject # the python "env" object + pystep::PyObject # the python env.step function + pyreset::PyObject # the python env.reset function + pystate::PyObject # the state array object referenced by the PyArray state.o + pystepres::PyObject # used to make stepping the env slightly more efficient + info::PyObject # store it as a PyObject for speed, since often unused + state::T reward::Float64 total_reward::Float64 actions::AbstractSet done::Bool - info::Dict - GymEnv(name,pyenv) = new(name,pyenv) -end -GymEnv(name) = gym(name) - -function Reinforce.reset!(env::GymEnv) - env.state = env.pyenv[:reset]() - env.reward = 0.0 - env.total_reward = 0.0 - env.actions = actions(env, nothing) - env.done = false + function GymEnv(name, pyenv, stateT=PyArray) + pystate = pycall(pyenv["reset"], PyObject) + state = convert(stateT, pystate) + env = new{typeof(state)}(name, pyenv, pyenv["step"], pyenv["reset"], + pystate, PyNULL(), PyNULL(), state) + reset!(env) + env + end end +GymEnv(name; stateT=PyArray) = gym(name; stateT=stateT) -function gym(name::AbstractString) +function gym(name::AbstractString; stateT=PyArray) env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0") Base.copy!(gym_soccer, pyimport("gym_soccer")) get!(_py_envs, name) do - GymEnv(name, pygym[:make](name)) + GymEnv(name, pygym[:make](name), stateT) end else - GymEnv(name, pygym[:make](name)) + GymEnv(name, pygym[:make](name), stateT) end reset!(env) env @@ -94,7 +98,6 @@ function actionset(A::PyObject) end end - function Reinforce.actions(env::AbstractGymEnv, s′) actionset(env.pyenv[:action_space]) end @@ -102,14 +105,61 @@ end pyaction(a::Vector) = Any[pyaction(ai) for ai=a] pyaction(a) = a -function Reinforce.step!(env::GymEnv, a) - # info("Going to take action: $a") +""" +`reset!` for PyArray state types +""" +function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray + env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject))) + return gymreset!(env) +end + +""" +`reset!` for non PyArray state types +""" +function Reinforce.reset!(env::GymEnv{T}) where T + pycall!(env.pystate, env.pyreset, PyObject) + env.state = convert(T, env.pystate) + return gymreset!(env) +end + +function gymreset!(env::GymEnv{T}) where T + env.reward = 0.0 + env.total_reward = 0.0 + env.actions = actions(env, nothing) + env.done = false + return env.state +end + +""" +`step!` for PyArray state +""" +function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray + pyact = pyaction(a) + pycall!(env.pystepres, env.pystep, PyObject, pyact) + + env.pystate, r, env.done, env.info = + convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres) + + env.state = PyArray(env.pystate) + + env.total_reward += r + return (r, env.state) +end + +""" +step! for non-PyArray state +""" +function Reinforce.step!(env::GymEnv{T}, a) where T pyact = pyaction(a) - s′, r, env.done, env.info = env.pyenv[:step](pyact) - env.reward = r + pycall!(env.pystepres, env.pystep, PyObject, pyact) + + env.pystate, r, env.done, env.info = + convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres) + + env.state = convert(T, env.pystate) + env.total_reward += r - env.state = s′ - r, s′ + return (r, env.state) end Reinforce.finished(env::GymEnv) = env.done diff --git a/test/runtests.jl b/test/runtests.jl index d8b973b..b3f92c0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -40,7 +40,7 @@ end pacman = GymEnv("MsPacman-v4") pacmannf = GymEnv("MsPacmanNoFrameskip-v4") cartpole = GymEnv("CartPole-v0") - bj = GymEnv("Blackjack-v0") + bj = GymEnv("Blackjack-v0", stateT=PyAny) allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj] eps2trial = Dict(pong=>2, pongnf=>1, pacman=>2, pacmannf=>1, cartpole=>400, bj=>30000) From 209240bc89881ae3489d51fa32e143c2ff620b29 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 19 Sep 2018 01:40:51 +1000 Subject: [PATCH 10/13] Clean up constructors and fix for 0.6 --- src/OpenAIGym.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index baf8fc3..17d09b8 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -36,18 +36,15 @@ mutable struct GymEnv{T} <: AbstractGymEnv total_reward::Float64 actions::AbstractSet done::Bool - function GymEnv(name, pyenv, stateT=PyArray) - pystate = pycall(pyenv["reset"], PyObject) - state = convert(stateT, pystate) - env = new{typeof(state)}(name, pyenv, pyenv["step"], pyenv["reset"], + function GymEnv{T}(name, pyenv, pystate, state) where T + env = new{T}(name, pyenv, pyenv["step"], pyenv["reset"], pystate, PyNULL(), PyNULL(), state) reset!(env) env end end -GymEnv(name; stateT=PyArray) = gym(name; stateT=stateT) -function gym(name::AbstractString; stateT=PyArray) +function GymEnv(name; stateT=PyArray) env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0") Base.copy!(gym_soccer, pyimport("gym_soccer")) get!(_py_envs, name) do @@ -60,6 +57,13 @@ function gym(name::AbstractString; stateT=PyArray) env end +function GymEnv(name, pyenv, stateT) + pystate = pycall(pyenv["reset"], PyObject) + state = convert(stateT, pystate) + T = typeof(state) + GymEnv{T}(name, pyenv, pystate, state) +end + # -------------------------------------------------------------- @@ -109,7 +113,7 @@ pyaction(a) = a `reset!` for PyArray state types """ function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray - env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject))) + env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject)) return gymreset!(env) end From ad5d3f20aad73e5215f599f9849f6efda928aaba Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 02:17:00 +1000 Subject: [PATCH 11/13] Use setdata! --- src/OpenAIGym.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index 17d09b8..d20ba85 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -113,7 +113,7 @@ pyaction(a) = a `reset!` for PyArray state types """ function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray - env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject)) + setdata!(env.state, pycall!(env.pystate, env.pyreset, PyObject)) return gymreset!(env) end @@ -144,7 +144,7 @@ function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray env.pystate, r, env.done, env.info = convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres) - env.state = PyArray(env.pystate) + setdata!(env.state, env.pystate) env.total_reward += r return (r, env.state) From 77bea11a692e9792e0d1982e78c591d4a79eba0c Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 14 Sep 2018 02:32:15 +1000 Subject: [PATCH 12/13] Use `unsafe_gettpl!` to speed up access to results of env.step() --- src/OpenAIGym.jl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index d20ba85..a76e914 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -30,6 +30,7 @@ mutable struct GymEnv{T} <: AbstractGymEnv pyreset::PyObject # the python env.reset function pystate::PyObject # the state array object referenced by the PyArray state.o pystepres::PyObject # used to make stepping the env slightly more efficient + pytplres::PyObject # used to make stepping the env slightly more efficient info::PyObject # store it as a PyObject for speed, since often unused state::T reward::Float64 @@ -141,13 +142,10 @@ function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray pyact = pyaction(a) pycall!(env.pystepres, env.pystep, PyObject, pyact) - env.pystate, r, env.done, env.info = - convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres) - + unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0) setdata!(env.state, env.pystate) - env.total_reward += r - return (r, env.state) + return gymstep!(env) end """ @@ -157,11 +155,16 @@ function Reinforce.step!(env::GymEnv{T}, a) where T pyact = pyaction(a) pycall!(env.pystepres, env.pystep, PyObject, pyact) - env.pystate, r, env.done, env.info = - convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres) - + unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0) env.state = convert(T, env.pystate) + return gymstep!(env) +end + +@inline function gymstep!(env) + r = unsafe_gettpl!(env.pytplres, env.pystepres, Float64, 1) + env.done = unsafe_gettpl!(env.pytplres, env.pystepres, Bool, 2) + unsafe_gettpl!(env.info, env.pystepres, PyObject, 3) env.total_reward += r return (r, env.state) end From 7af1c05f8ed93b5c25eae2e5f0d8552c718a8622 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Fri, 5 Oct 2018 03:21:16 +1000 Subject: [PATCH 13/13] Add PyNULL() for pytplres in GymEnv Constructor oopsie daisy : D - was quite broken without this --- src/OpenAIGym.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl index a76e914..7938bb0 100644 --- a/src/OpenAIGym.jl +++ b/src/OpenAIGym.jl @@ -11,7 +11,7 @@ import Reinforce: KeyboardAction, KeyboardActionSet export - gym, + pygym, GymEnv, test_env, PyAny @@ -37,9 +37,9 @@ mutable struct GymEnv{T} <: AbstractGymEnv total_reward::Float64 actions::AbstractSet done::Bool - function GymEnv{T}(name, pyenv, pystate, state) where T + function GymEnv{T}(name, pyenv, pystate, state::T) where T env = new{T}(name, pyenv, pyenv["step"], pyenv["reset"], - pystate, PyNULL(), PyNULL(), state) + pystate, PyNULL(), PyNULL(), PyNULL(), state) reset!(env) env end @@ -54,7 +54,6 @@ function GymEnv(name; stateT=PyArray) else GymEnv(name, pygym[:make](name), stateT) end - reset!(env) env end