From ef347108d882d82b5dc69935eca1ad7d0b49b087 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 17 Nov 2017 17:09:13 +1100
Subject: [PATCH 01/13] Adds env.total_reward as total reward since last reset!

Plus:
* update abstract type syntax for 0.6+
* remove a non-universal linux workaround
---
 src/OpenAIGym.jl | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index 9e8353a..a037aa0 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -19,7 +19,7 @@ const _py_envs = Dict{String,Any}()
 
 # --------------------------------------------------------------
 
-abstract AbstractGymEnv <: AbstractEnvironment
+abstract type AbstractGymEnv <: AbstractEnvironment end
 
 "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
 type GymEnv <: AbstractGymEnv
@@ -27,6 +27,7 @@ type GymEnv <: AbstractGymEnv
     pyenv  # the python "env" object
     state
     reward::Float64
+    total_reward::Float64
     actions::AbstractSet
     done::Bool
     info::Dict
@@ -37,6 +38,7 @@ GymEnv(name) = gym(name)
 function Reinforce.reset!(env::GymEnv)
     env.state = env.pyenv[:reset]()
     env.reward = 0.0
+    env.total_reward = 0.0
     env.actions = actions(env, nothing)
     env.done = false
 end
@@ -47,6 +49,7 @@ type UniverseEnv <: AbstractGymEnv
     pyenv  # the python "env" object
     state
     reward
+    total_reward
     actions::AbstractSet
     done
     info::Dict
@@ -57,6 +60,7 @@ UniverseEnv(name) = gym(name)
 function Reinforce.reset!(env::UniverseEnv)
     env.state = env.pyenv[:reset]()
     env.reward = [0.0]
+    env.total_reward = 0.0
     env.actions = actions(env, nothing)
     env.done = [false]
 end
@@ -92,7 +96,8 @@ end
 
 # --------------------------------------------------------------
 
-render(env::AbstractGymEnv, args...) = env.pyenv[:render]()
+render(env::AbstractGymEnv, args...; kwargs...) =
+    pycall(env.pyenv[:render], PyAny; kwargs...)
 
 # --------------------------------------------------------------
 
@@ -148,20 +153,26 @@ function Reinforce.step!(env::GymEnv, s, a)
     # info("Going to take action: $a")
     pyact = pyaction(a)
     s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward, env.state = r, s′
+    env.reward = r
+    env.total_reward += r
+    env.state = s′
+    r, s′
 end
 
 function Reinforce.step!(env::UniverseEnv, s, a)
     info("Going to take action: $a")
     pyact = Any[pyaction(a)]
     s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward, env.state = r, s′
+    env.reward = r
+    env.total_reward += r[1] # assuming it's an array based on `reset!`
+    env.state = s′
+    r, s′
 end
 
+Reinforce.finished(env::GymEnv) = env.done
 Reinforce.finished(env::GymEnv, s′) = env.done
 Reinforce.finished(env::UniverseEnv, s′) = all(env.done)
 
-
 # --------------------------------------------------------------
 
 
@@ -175,12 +186,12 @@ end
 
 
 function __init__()
-    @static if is_linux()
-        # due to a ssl library bug, I have to first load the ssl lib here
-        condadir = Pkg.dir("Conda","deps","usr","lib")
-        Libdl.dlopen(joinpath(condadir, "libssl.so"))
-        Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so"))
-    end
+    # @static if is_linux()
+    #     # due to a ssl library bug, I have to first load the ssl lib here
+    #     condadir = Pkg.dir("Conda","deps","usr","lib")
+    #     Libdl.dlopen(joinpath(condadir, "libssl.so"))
+    #     Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so"))
+    # end
 
     global const pygym = pyimport("gym")
 end

From 670eff8d164206763f30896f85dc9dbc0e78861a Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 00:04:22 +1000
Subject: [PATCH 02/13] Remove Universe

AFAICT Universe is not maintained by OpenAI anymore, and should probably have it's own file/project anyway to reduce clutter
---
 src/OpenAIGym.jl | 65 ------------------------------------------------
 1 file changed, 65 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index a037aa0..560746d 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -43,49 +43,12 @@ function Reinforce.reset!(env::GymEnv)
     env.done = false
 end
 
-"A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
-type UniverseEnv <: AbstractGymEnv
-    name::String
-    pyenv  # the python "env" object
-    state
-    reward
-    total_reward
-    actions::AbstractSet
-    done
-    info::Dict
-    UniverseEnv(name,pyenv) = new(name,pyenv)
-end
-UniverseEnv(name) = gym(name)
-
-function Reinforce.reset!(env::UniverseEnv)
-    env.state = env.pyenv[:reset]()
-    env.reward = [0.0]
-    env.total_reward = 0.0
-    env.actions = actions(env, nothing)
-    env.done = [false]
-end
-
 function gym(name::AbstractString)
     env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
         @pyimport gym_soccer
         get!(_py_envs, name) do
             GymEnv(name, pygym[:make](name))
         end
-    elseif split(name, ".")[1] in ("flashgames", "wob")
-        @pyimport universe
-        @pyimport universe.wrappers as wrappers
-        if !isdefined(OpenAIGym, :vnc_event)
-            global const vnc_event = PyCall.pywrap(PyCall.pyimport("universe.spaces.vnc_event"))
-        end
-        get!(_py_envs, name) do
-            pyenv = wrappers.SafeActionSpace(pygym[:make](name))
-            pyenv[:configure](remotes=1)  # automatically creates a local docker container
-            # pyenv[:configure](remotes="vnc://localhost:5900+15900")
-            o = UniverseEnv(name, pyenv)
-            # finalizer(o,  o.pyenv[:close]())
-            sleep(2)
-            o
-        end
     else
         GymEnv(name, pygym[:make](name))
     end
@@ -121,14 +84,6 @@ function actionset(A::PyObject)
         #     # error("Unsupported shape for IntervalSet: $(A[:shape])")
         #     [IntervalSet{Float64}(lo[i], hi[i]) for i=1:length(lo)]
         # end
-    elseif haskey(A, :buttonmasks)
-        # assumed VNC actions... keys to press, buttons to mask, and screen position
-        # keyboard = DiscreteSet(A[:keys])
-        keyboard = KeyboardActionSet(A[:keys])
-        buttons = DiscreteSet(Int[bm for bm in A[:buttonmasks]])
-        width,height = A[:screen_shape]
-        mouse = MouseActionSet(width, height, buttons)
-        TupleSet(keyboard, mouse)
     elseif haskey(A, :actions)
         # Hardcoded
         TupleSet(DiscreteSet(A[:actions]))
@@ -145,8 +100,6 @@ function Reinforce.actions(env::AbstractGymEnv, s′)
 end
 
 pyaction(a::Vector) = Any[pyaction(ai) for ai=a]
-pyaction(a::KeyboardAction) = Any[a.key]
-pyaction(a::MouseAction) = Any[vnc_event.PointerEvent(a.x, a.y, a.button)]
 pyaction(a) = a
 
 function Reinforce.step!(env::GymEnv, s, a)
@@ -159,19 +112,8 @@ function Reinforce.step!(env::GymEnv, s, a)
     r, s′
 end
 
-function Reinforce.step!(env::UniverseEnv, s, a)
-    info("Going to take action: $a")
-    pyact = Any[pyaction(a)]
-    s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward = r
-    env.total_reward += r[1] # assuming it's an array based on `reset!`
-    env.state = s′
-    r, s′
-end
-
 Reinforce.finished(env::GymEnv) = env.done
 Reinforce.finished(env::GymEnv, s′) = env.done
-Reinforce.finished(env::UniverseEnv, s′) = all(env.done)
 
 # --------------------------------------------------------------
 
@@ -186,13 +128,6 @@ end
 
 
 function __init__()
-    # @static if is_linux()
-    #     # due to a ssl library bug, I have to first load the ssl lib here
-    #     condadir = Pkg.dir("Conda","deps","usr","lib")
-    #     Libdl.dlopen(joinpath(condadir, "libssl.so"))
-    #     Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so"))
-    # end
-
     global const pygym = pyimport("gym")
 end
 

From 3b5ed5da5c67f76e984ee887e7fa7db578b51ab8 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 00:25:44 +1000
Subject: [PATCH 03/13] 1.0 support

---
 src/OpenAIGym.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index 560746d..36f7d37 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -22,7 +22,7 @@ const _py_envs = Dict{String,Any}()
 abstract type AbstractGymEnv <: AbstractEnvironment end
 
 "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
-type GymEnv <: AbstractGymEnv
+mutable struct GymEnv <: AbstractGymEnv
     name::String
     pyenv  # the python "env" object
     state
@@ -45,7 +45,7 @@ end
 
 function gym(name::AbstractString)
     env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
-        @pyimport gym_soccer
+        Base.copy!(gym_soccer, pyimport("gym_soccer"))
         get!(_py_envs, name) do
             GymEnv(name, pygym[:make](name))
         end
@@ -127,8 +127,12 @@ end
 
 
 
+global const pygym = PyNULL()
+global const pysoccer = PyNULL()
+
 function __init__()
-    global const pygym = pyimport("gym")
+    # the copy! puts the gym module into `pygym`, handling python ref-counting
+    Base.copy!(pygym, pyimport("gym"))
 end
 
 end # module

From fca8d08c686702f7b7eba5b10676d1887edf03ce Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 17:23:13 +1000
Subject: [PATCH 04/13] Remove state param from `step!(env, ...)`

---
 src/OpenAIGym.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index 36f7d37..b6ce2a5 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -102,7 +102,7 @@ end
 pyaction(a::Vector) = Any[pyaction(ai) for ai=a]
 pyaction(a) = a
 
-function Reinforce.step!(env::GymEnv, s, a)
+function Reinforce.step!(env::GymEnv, a)
     # info("Going to take action: $a")
     pyact = pyaction(a)
     s′, r, env.done, env.info = env.pyenv[:step](pyact)

From 1b9733bfbdf76108424d3011fb6fd1b770f27955 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 00:55:40 +1000
Subject: [PATCH 05/13] Add tests that double as benchmarks

---
 test/runtests.jl | 141 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 132 insertions(+), 9 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 143398f..a55aaad 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,13 +1,136 @@
-using OpenAIGym
-using Base.Test
+using OpenAIGym, Compat.Test
+using PyCall
 
-# write your own tests here
-@test 1 == 1
+"""
+`function time_steps(env::GymEnv{T}, num_eps::Int) where T`
 
-if isinteractive()
-    env = GymEnv("CartPole-v0")
-    for i=1:5
-        R = run_episode(()->nothing, env, RandomPolicy())
-        info("Episode $i finished. Total reward: $R")
+run through num_eps eps, recording the time taken for each step and
+how many steps were made. Doesn't time the `reset!` or the first step of each
+episode (since higher chance that it's slower/faster than the rest, and we want
+to compare the average time taken for each step as fairly as possible)
+"""
+# function time_steps(env::GymEnv{T}, num_eps::Int) where T
+function time_steps(env::GymEnv, num_eps::Int)
+    t = 0.0
+    steps = 0
+    for i in 1:num_eps
+        reset!(env)
+        step!(env, rand(env.actions)) # ignore the first step - it might be slow?
+        t += (@elapsed steps += epstep(env))
+    end
+    steps, t
+end
+
+"""
+Steps through an episode until it's `done`
+assumes env has been `reset!`
+"""
+# function epstep(env::GymEnv{T}) where T
+function epstep(env::GymEnv)
+    steps = 0
+    while !env.done
+        steps += 1
+        r, s = step!(env, rand(env.actions))
+    end
+    steps
+end
+
+@testset "Gym Basics" begin
+
+    pong = GymEnv("Pong-v4")
+    pongnf = GymEnv("PongNoFrameskip-v4")
+    pacman = GymEnv("MsPacman-v4")
+    pacmannf = GymEnv("MsPacmanNoFrameskip-v4")
+    cartpole = GymEnv("CartPole-v0")
+    bj = GymEnv("Blackjack-v0")
+
+    allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
+    eps2trial = Dict(pong=>4, pongnf=>4, pacman=>9, pacmannf=>9, cartpole=>5000, bj=>30000)
+    atarienvs = [pong, pongnf, pacman, pacmannf]
+    envs = allenvs
+
+    @testset "envs load" begin
+        # check they all work - no errors == no worries
+        println("------------------------------ Check envs load ------------------------------")
+        for (i, env) in enumerate(envs)
+            @show env.name env.pyenv a = rand(env.actions)|>OpenAIGym.pyaction PyObject(a)|>pytypeof
+            time_steps(env, 1)
+            @test !ispynull(env.pyenv)
+            println("------------------------------")
+        end
+    end
+
+    @testset "julia speed test" begin
+        println("------------------------------ Begin Julia Speed Check ------------------------------")
+        for env in envs
+            num_eps = eps2trial[env]
+            steps, t = time_steps(env, num_eps)
+            @show env.name num_eps t steps
+            println("microsecs/step (lower is better): ", t*1e6/steps)
+            println("------------------------------")
+        end
+        println("------------------------------ End Julia Speed Check ------------------------------\n")
+    end
+
+    @testset "python speed test" begin
+        println("------------------------------ Begin Python Speed Check ------------------------------")
+        py"""
+        import gym
+        import numpy as np
+
+        pong = gym.make("Pong-v4")
+        pongnf = gym.make("PongNoFrameskip-v4")
+        pacman = gym.make("MsPacman-v4");
+        pacmannf = gym.make("MsPacmanNoFrameskip-v4");
+        cartpole = gym.make("CartPole-v0")
+        bj = gym.make("Blackjack-v0")
+
+        allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
+        eps2trial = {pong: 4, pongnf: 4, pacman: 9, pacmannf: 9, cartpole: 5000, bj: 30000}
+        atarienvs = [pong, pongnf, pacman, pacmannf];
+
+        envs = allenvs
+
+        import time
+        class Timer(object):
+            elapsed = 0.0
+            def __init__(self, name=None):
+                self.name = name
+
+            def __enter__(self):
+                self.tstart = time.time()
+
+            def __exit__(self, type, value, traceback):
+                Timer.elapsed = time.time() - self.tstart
+
+        def time_steps(env, num_eps):
+            t = 0.0
+            steps = 0
+            for i in range(num_eps):
+                env.reset()
+                with Timer():
+                    steps += epstep(env)
+                t += Timer.elapsed
+            return steps, t
+
+        def epstep(env):
+            steps = 0
+            while True:
+                steps += 1
+                action = env.action_space.sample()
+                state, reward, done, info = env.step(action)
+                if done == True:
+                    break
+            return steps
+
+        for env in envs:
+            num_eps = eps2trial[env]
+            with Timer():
+                steps, s = time_steps(env, num_eps)
+            t = Timer.elapsed
+            print(f"{env} num_eps: {num_eps} t: {t} steps: {steps} \n microsecs/step (lower is better): {t*1e6/steps}")
+            print("------------------------------")
+        """
+        println("------------------------------ End Python Speed Check ------------------------------")
     end
 end

From 2152f2239e1c8d989e14d457a3eea9b081209518 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Wed, 19 Sep 2018 01:38:38 +1000
Subject: [PATCH 06/13] Reduce num episodes for slow envs to speed up tests

And improve test output formatting
---
 test/runtests.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index a55aaad..c86fa40 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,7 +9,6 @@ how many steps were made. Doesn't time the `reset!` or the first step of each
 episode (since higher chance that it's slower/faster than the rest, and we want
 to compare the average time taken for each step as fairly as possible)
 """
-# function time_steps(env::GymEnv{T}, num_eps::Int) where T
 function time_steps(env::GymEnv, num_eps::Int)
     t = 0.0
     steps = 0
@@ -25,7 +24,6 @@ end
 Steps through an episode until it's `done`
 assumes env has been `reset!`
 """
-# function epstep(env::GymEnv{T}) where T
 function epstep(env::GymEnv)
     steps = 0
     while !env.done
@@ -45,7 +43,7 @@ end
     bj = GymEnv("Blackjack-v0")
 
     allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
-    eps2trial = Dict(pong=>4, pongnf=>4, pacman=>9, pacmannf=>9, cartpole=>5000, bj=>30000)
+    eps2trial = Dict(pong=>1, pongnf=>1, pacman=>2, pacmannf=>2, cartpole=>100, bj=>30000)
     atarienvs = [pong, pongnf, pacman, pacmannf]
     envs = allenvs
 
@@ -53,7 +51,9 @@ end
         # check they all work - no errors == no worries
         println("------------------------------ Check envs load ------------------------------")
         for (i, env) in enumerate(envs)
-            @show env.name env.pyenv a = rand(env.actions)|>OpenAIGym.pyaction PyObject(a)|>pytypeof
+            a = rand(env.actions) |> OpenAIGym.pyaction
+            action_type = a |> PyObject |> pytypeof
+            println("env.pyenv: $(env.pyenv) action_type: $action_type ex: $a")
             time_steps(env, 1)
             @test !ispynull(env.pyenv)
             println("------------------------------")
@@ -65,7 +65,7 @@ end
         for env in envs
             num_eps = eps2trial[env]
             steps, t = time_steps(env, num_eps)
-            @show env.name num_eps t steps
+            println("env.pyenv: $(env.pyenv) num_eps: $num_eps t: $t steps: $steps")
             println("microsecs/step (lower is better): ", t*1e6/steps)
             println("------------------------------")
         end

From 32b64becdcc6b645450b8422234664007f9a76bf Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Wed, 19 Sep 2018 02:09:25 +1000
Subject: [PATCH 07/13] Run envs the same number of eps and steps in each lang

---
 test/runtests.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index c86fa40..6ada7bd 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -14,7 +14,7 @@ function time_steps(env::GymEnv, num_eps::Int)
     steps = 0
     for i in 1:num_eps
         reset!(env)
-        step!(env, rand(env.actions)) # ignore the first step - it might be slow?
+        # step!(env, rand(env.actions)) # ignore the first step - it might be slow?
         t += (@elapsed steps += epstep(env))
     end
     steps, t
@@ -43,7 +43,7 @@ end
     bj = GymEnv("Blackjack-v0")
 
     allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
-    eps2trial = Dict(pong=>1, pongnf=>1, pacman=>2, pacmannf=>2, cartpole=>100, bj=>30000)
+    eps2trial = Dict(pong=>2, pongnf=>1, pacman=>2, pacmannf=>1, cartpole=>400, bj=>30000)
     atarienvs = [pong, pongnf, pacman, pacmannf]
     envs = allenvs
 
@@ -86,7 +86,7 @@ end
         bj = gym.make("Blackjack-v0")
 
         allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
-        eps2trial = {pong: 4, pongnf: 4, pacman: 9, pacmannf: 9, cartpole: 5000, bj: 30000}
+        eps2trial = {pong: 2, pongnf: 1, pacman: 2, pacmannf: 1, cartpole: 400, bj: 30000}
         atarienvs = [pong, pongnf, pacman, pacmannf];
 
         envs = allenvs

From 8a9d1b2a0ddf7ec7a5d5448f7edf4476c19705db Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Wed, 19 Sep 2018 02:15:00 +1000
Subject: [PATCH 08/13] Remove unused code

---
 src/OpenAIGym.jl | 10 ----------
 test/runtests.jl |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index b6ce2a5..fd09171 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -117,16 +117,6 @@ Reinforce.finished(env::GymEnv, s′) = env.done
 
 # --------------------------------------------------------------
 
-
-function test_env(name::String = "CartPole-v0")
-    env = gym(name)
-    for sars′ in Episode(env, RandomPolicy())
-        render(env)
-    end
-end
-
-
-
 global const pygym = PyNULL()
 global const pysoccer = PyNULL()
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 6ada7bd..d8b973b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -53,7 +53,7 @@ end
         for (i, env) in enumerate(envs)
             a = rand(env.actions) |> OpenAIGym.pyaction
             action_type = a |> PyObject |> pytypeof
-            println("env.pyenv: $(env.pyenv) action_type: $action_type ex: $a")
+            println("env.pyenv: $(env.pyenv) action_type: $action_type (e.g. $a)")
             time_steps(env, 1)
             @test !ispynull(env.pyenv)
             println("------------------------------")

From a42b8da8cb9f921911a3f94bc4d75ce8a5d9813f Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 01:45:42 +1000
Subject: [PATCH 09/13] Add state type param to env for speed

---
 src/OpenAIGym.jl | 100 +++++++++++++++++++++++++++++++++++------------
 test/runtests.jl |   2 +-
 2 files changed, 76 insertions(+), 26 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index fd09171..baf8fc3 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -13,7 +13,8 @@ import Reinforce:
 export
     gym,
     GymEnv,
-    test_env
+    test_env,
+    PyAny
 
 const _py_envs = Dict{String,Any}()
 
@@ -22,35 +23,38 @@ const _py_envs = Dict{String,Any}()
 abstract type AbstractGymEnv <: AbstractEnvironment end
 
 "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
-mutable struct GymEnv <: AbstractGymEnv
+mutable struct GymEnv{T} <: AbstractGymEnv
     name::String
-    pyenv  # the python "env" object
-    state
+    pyenv::PyObject   # the python "env" object
+    pystep::PyObject  # the python env.step function
+    pyreset::PyObject # the python env.reset function
+    pystate::PyObject # the state array object referenced by the PyArray state.o
+    pystepres::PyObject # used to make stepping the env slightly more efficient
+    info::PyObject    # store it as a PyObject for speed, since often unused
+    state::T
     reward::Float64
     total_reward::Float64
     actions::AbstractSet
     done::Bool
-    info::Dict
-    GymEnv(name,pyenv) = new(name,pyenv)
-end
-GymEnv(name) = gym(name)
-
-function Reinforce.reset!(env::GymEnv)
-    env.state = env.pyenv[:reset]()
-    env.reward = 0.0
-    env.total_reward = 0.0
-    env.actions = actions(env, nothing)
-    env.done = false
+    function GymEnv(name, pyenv, stateT=PyArray)
+        pystate = pycall(pyenv["reset"], PyObject)
+        state = convert(stateT, pystate)
+        env = new{typeof(state)}(name, pyenv, pyenv["step"], pyenv["reset"],
+                                 pystate, PyNULL(), PyNULL(), state)
+        reset!(env)
+        env
+    end
 end
+GymEnv(name; stateT=PyArray) = gym(name; stateT=stateT)
 
-function gym(name::AbstractString)
+function gym(name::AbstractString; stateT=PyArray)
     env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
         Base.copy!(gym_soccer, pyimport("gym_soccer"))
         get!(_py_envs, name) do
-            GymEnv(name, pygym[:make](name))
+            GymEnv(name, pygym[:make](name), stateT)
         end
     else
-        GymEnv(name, pygym[:make](name))
+        GymEnv(name, pygym[:make](name), stateT)
     end
     reset!(env)
     env
@@ -94,7 +98,6 @@ function actionset(A::PyObject)
     end
 end
 
-
 function Reinforce.actions(env::AbstractGymEnv, s′)
     actionset(env.pyenv[:action_space])
 end
@@ -102,14 +105,61 @@ end
 pyaction(a::Vector) = Any[pyaction(ai) for ai=a]
 pyaction(a) = a
 
-function Reinforce.step!(env::GymEnv, a)
-    # info("Going to take action: $a")
+"""
+`reset!` for PyArray state types
+"""
+function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray
+    env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject)))
+    return gymreset!(env)
+end
+
+"""
+`reset!` for non PyArray state types
+"""
+function Reinforce.reset!(env::GymEnv{T}) where T
+    pycall!(env.pystate, env.pyreset, PyObject)
+    env.state = convert(T, env.pystate)
+    return gymreset!(env)
+end
+
+function gymreset!(env::GymEnv{T}) where T
+    env.reward = 0.0
+    env.total_reward = 0.0
+    env.actions = actions(env, nothing)
+    env.done = false
+    return env.state
+end
+
+"""
+`step!` for PyArray state
+"""
+function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray
+    pyact = pyaction(a)
+    pycall!(env.pystepres, env.pystep, PyObject, pyact)
+
+    env.pystate, r, env.done, env.info =
+        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
+
+    env.state = PyArray(env.pystate)
+
+    env.total_reward += r
+    return (r, env.state)
+end
+
+"""
+step! for non-PyArray state
+"""
+function Reinforce.step!(env::GymEnv{T}, a) where T
     pyact = pyaction(a)
-    s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward = r
+    pycall!(env.pystepres, env.pystep, PyObject, pyact)
+
+    env.pystate, r, env.done, env.info =
+        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
+
+    env.state = convert(T, env.pystate)
+
     env.total_reward += r
-    env.state = s′
-    r, s′
+    return (r, env.state)
 end
 
 Reinforce.finished(env::GymEnv) = env.done
diff --git a/test/runtests.jl b/test/runtests.jl
index d8b973b..b3f92c0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -40,7 +40,7 @@ end
     pacman = GymEnv("MsPacman-v4")
     pacmannf = GymEnv("MsPacmanNoFrameskip-v4")
     cartpole = GymEnv("CartPole-v0")
-    bj = GymEnv("Blackjack-v0")
+    bj = GymEnv("Blackjack-v0", stateT=PyAny)
 
     allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
     eps2trial = Dict(pong=>2, pongnf=>1, pacman=>2, pacmannf=>1, cartpole=>400, bj=>30000)

From 209240bc89881ae3489d51fa32e143c2ff620b29 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Wed, 19 Sep 2018 01:40:51 +1000
Subject: [PATCH 10/13] Clean up constructors and fix for 0.6

---
 src/OpenAIGym.jl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index baf8fc3..17d09b8 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -36,18 +36,15 @@ mutable struct GymEnv{T} <: AbstractGymEnv
     total_reward::Float64
     actions::AbstractSet
     done::Bool
-    function GymEnv(name, pyenv, stateT=PyArray)
-        pystate = pycall(pyenv["reset"], PyObject)
-        state = convert(stateT, pystate)
-        env = new{typeof(state)}(name, pyenv, pyenv["step"], pyenv["reset"],
+    function GymEnv{T}(name, pyenv, pystate, state) where T
+        env = new{T}(name, pyenv, pyenv["step"], pyenv["reset"],
                                  pystate, PyNULL(), PyNULL(), state)
         reset!(env)
         env
     end
 end
-GymEnv(name; stateT=PyArray) = gym(name; stateT=stateT)
 
-function gym(name::AbstractString; stateT=PyArray)
+function GymEnv(name; stateT=PyArray)
     env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
         Base.copy!(gym_soccer, pyimport("gym_soccer"))
         get!(_py_envs, name) do
@@ -60,6 +57,13 @@ function gym(name::AbstractString; stateT=PyArray)
     env
 end
 
+function GymEnv(name, pyenv, stateT)
+    pystate = pycall(pyenv["reset"], PyObject)
+    state = convert(stateT, pystate)
+    T = typeof(state)
+    GymEnv{T}(name, pyenv, pystate, state)
+end
+
 
 # --------------------------------------------------------------
 
@@ -109,7 +113,7 @@ pyaction(a) = a
 `reset!` for PyArray state types
 """
 function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray
-    env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject)))
+    env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject))
     return gymreset!(env)
 end
 

From ad5d3f20aad73e5215f599f9849f6efda928aaba Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 02:17:00 +1000
Subject: [PATCH 11/13] Use setdata!

---
 src/OpenAIGym.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index 17d09b8..d20ba85 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -113,7 +113,7 @@ pyaction(a) = a
 `reset!` for PyArray state types
 """
 function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray
-    env.state = PyArray(pycall!(env.pystate, env.pyreset, PyObject))
+    setdata!(env.state, pycall!(env.pystate, env.pyreset, PyObject))
     return gymreset!(env)
 end
 
@@ -144,7 +144,7 @@ function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray
     env.pystate, r, env.done, env.info =
         convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
 
-    env.state = PyArray(env.pystate)
+    setdata!(env.state, env.pystate)
 
     env.total_reward += r
     return (r, env.state)

From 77bea11a692e9792e0d1982e78c591d4a79eba0c Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 14 Sep 2018 02:32:15 +1000
Subject: [PATCH 12/13] Use `unsafe_gettpl!` to speed up access to results of
 env.step()

---
 src/OpenAIGym.jl | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index d20ba85..a76e914 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -30,6 +30,7 @@ mutable struct GymEnv{T} <: AbstractGymEnv
     pyreset::PyObject # the python env.reset function
     pystate::PyObject # the state array object referenced by the PyArray state.o
     pystepres::PyObject # used to make stepping the env slightly more efficient
+    pytplres::PyObject  # used to make stepping the env slightly more efficient
     info::PyObject    # store it as a PyObject for speed, since often unused
     state::T
     reward::Float64
@@ -141,13 +142,10 @@ function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray
     pyact = pyaction(a)
     pycall!(env.pystepres, env.pystep, PyObject, pyact)
 
-    env.pystate, r, env.done, env.info =
-        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
-
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
     setdata!(env.state, env.pystate)
 
-    env.total_reward += r
-    return (r, env.state)
+    return gymstep!(env)
 end
 
 """
@@ -157,11 +155,16 @@ function Reinforce.step!(env::GymEnv{T}, a) where T
     pyact = pyaction(a)
     pycall!(env.pystepres, env.pystep, PyObject, pyact)
 
-    env.pystate, r, env.done, env.info =
-        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
-
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
     env.state = convert(T, env.pystate)
 
+    return gymstep!(env)
+end
+
+@inline function gymstep!(env)
+    r = unsafe_gettpl!(env.pytplres, env.pystepres, Float64, 1)
+    env.done = unsafe_gettpl!(env.pytplres, env.pystepres, Bool, 2)
+    unsafe_gettpl!(env.info, env.pystepres, PyObject, 3)
     env.total_reward += r
     return (r, env.state)
 end

From 7af1c05f8ed93b5c25eae2e5f0d8552c718a8622 Mon Sep 17 00:00:00 2001
From: Joel Mason <jobba1@hotmail.com>
Date: Fri, 5 Oct 2018 03:21:16 +1000
Subject: [PATCH 13/13] Add PyNULL() for pytplres in GymEnv Constructor

oopsie daisy : D - was quite broken without this
---
 src/OpenAIGym.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
index a76e914..7938bb0 100644
--- a/src/OpenAIGym.jl
+++ b/src/OpenAIGym.jl
@@ -11,7 +11,7 @@ import Reinforce:
     KeyboardAction, KeyboardActionSet
 
 export
-    gym,
+    pygym,
     GymEnv,
     test_env,
     PyAny
@@ -37,9 +37,9 @@ mutable struct GymEnv{T} <: AbstractGymEnv
     total_reward::Float64
     actions::AbstractSet
     done::Bool
-    function GymEnv{T}(name, pyenv, pystate, state) where T
+    function GymEnv{T}(name, pyenv, pystate, state::T) where T
         env = new{T}(name, pyenv, pyenv["step"], pyenv["reset"],
-                                 pystate, PyNULL(), PyNULL(), state)
+                         pystate, PyNULL(), PyNULL(), PyNULL(), state)
         reset!(env)
         env
     end
@@ -54,7 +54,6 @@ function GymEnv(name; stateT=PyArray)
     else
         GymEnv(name, pygym[:make](name), stateT)
     end
-    reset!(env)
     env
 end