JuliaML · JobJob · Nov 17, 2017 · Sep 13, 2018 · Sep 13, 2018 · Sep 14, 2018
diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
@@ -11,88 +11,64 @@ import Reinforce:
     KeyboardAction, KeyboardActionSet
 
 export
-    gym,
+    pygym,
     GymEnv,
-    test_env
+    test_env,
+    PyAny
 
 const _py_envs = Dict{String,Any}()
 
 # --------------------------------------------------------------
 
-abstract AbstractGymEnv <: AbstractEnvironment
+abstract type AbstractGymEnv <: AbstractEnvironment end
 
 "A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
-type GymEnv <: AbstractGymEnv
+mutable struct GymEnv{T} <: AbstractGymEnv
     name::String
-    pyenv  # the python "env" object
-    state
+    pyenv::PyObject   # the python "env" object
+    pystep::PyObject  # the python env.step function
+    pyreset::PyObject # the python env.reset function
+    pystate::PyObject # the state array object referenced by the PyArray state.o
+    pystepres::PyObject # used to make stepping the env slightly more efficient
+    pytplres::PyObject  # used to make stepping the env slightly more efficient
+    info::PyObject    # store it as a PyObject for speed, since often unused
+    state::T
     reward::Float64
+    total_reward::Float64
     actions::AbstractSet
     done::Bool
-    info::Dict
-    GymEnv(name,pyenv) = new(name,pyenv)
-end
-GymEnv(name) = gym(name)
-
-function Reinforce.reset!(env::GymEnv)
-    env.state = env.pyenv[:reset]()
-    env.reward = 0.0
-    env.actions = actions(env, nothing)
-    env.done = false
-end
-
-"A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
-type UniverseEnv <: AbstractGymEnv
-    name::String
-    pyenv  # the python "env" object
-    state
-    reward
-    actions::AbstractSet
-    done
-    info::Dict
-    UniverseEnv(name,pyenv) = new(name,pyenv)
-end
-UniverseEnv(name) = gym(name)
-
-function Reinforce.reset!(env::UniverseEnv)
-    env.state = env.pyenv[:reset]()
-    env.reward = [0.0]
-    env.actions = actions(env, nothing)
-    env.done = [false]
+    function GymEnv{T}(name, pyenv, pystate, state::T) where T
+        env = new{T}(name, pyenv, pyenv["step"], pyenv["reset"],
+                         pystate, PyNULL(), PyNULL(), PyNULL(), state)
+        reset!(env)
+        env
+    end
 end
 
-function gym(name::AbstractString)
+function GymEnv(name; stateT=PyArray)
     env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
-        @pyimport gym_soccer
-        get!(_py_envs, name) do
-            GymEnv(name, pygym[:make](name))
-        end
-    elseif split(name, ".")[1] in ("flashgames", "wob")
-        @pyimport universe
-        @pyimport universe.wrappers as wrappers
-        if !isdefined(OpenAIGym, :vnc_event)
-            global const vnc_event = PyCall.pywrap(PyCall.pyimport("universe.spaces.vnc_event"))
-        end
+        Base.copy!(gym_soccer, pyimport("gym_soccer"))
         get!(_py_envs, name) do
-            pyenv = wrappers.SafeActionSpace(pygym[:make](name))
-            pyenv[:configure](remotes=1)  # automatically creates a local docker container
-            # pyenv[:configure](remotes="vnc://localhost:5900+15900")
-            o = UniverseEnv(name, pyenv)
-            # finalizer(o,  o.pyenv[:close]())
-            sleep(2)
-            o
+            GymEnv(name, pygym[:make](name), stateT)
         end
     else
-        GymEnv(name, pygym[:make](name))
+        GymEnv(name, pygym[:make](name), stateT)
     end
-    reset!(env)
     env
 end
 
+function GymEnv(name, pyenv, stateT)
+    pystate = pycall(pyenv["reset"], PyObject)
+    state = convert(stateT, pystate)
+    T = typeof(state)
+    GymEnv{T}(name, pyenv, pystate, state)
+end
+
 
 # --------------------------------------------------------------
 
-render(env::AbstractGymEnv, args...) = env.pyenv[:render]()
+render(env::AbstractGymEnv, args...; kwargs...) =
+    pycall(env.pyenv[:render], PyAny; kwargs...)
 
 # --------------------------------------------------------------
 
@@ -116,14 +92,6 @@ function actionset(A::PyObject)
         #     # error("Unsupported shape for IntervalSet: $(A[:shape])")
         #     [IntervalSet{Float64}(lo[i], hi[i]) for i=1:length(lo)]
         # end
-    elseif haskey(A, :buttonmasks)
-        # assumed VNC actions... keys to press, buttons to mask, and screen position
-        # keyboard = DiscreteSet(A[:keys])
-        keyboard = KeyboardActionSet(A[:keys])
-        buttons = DiscreteSet(Int[bm for bm in A[:buttonmasks]])
-        width,height = A[:screen_shape]
-        mouse = MouseActionSet(width, height, buttons)
-        TupleSet(keyboard, mouse)
     elseif haskey(A, :actions)
         # Hardcoded
         TupleSet(DiscreteSet(A[:actions]))
@@ -134,55 +102,83 @@ function actionset(A::PyObject)
     end
 end
 
-
 function Reinforce.actions(env::AbstractGymEnv, s′)
     actionset(env.pyenv[:action_space])
 end
 
 pyaction(a::Vector) = Any[pyaction(ai) for ai=a]
-pyaction(a::KeyboardAction) = Any[a.key]
-pyaction(a::MouseAction) = Any[vnc_event.PointerEvent(a.x, a.y, a.button)]
 pyaction(a) = a
 
-function Reinforce.step!(env::GymEnv, s, a)
-    # info("Going to take action: $a")
-    pyact = pyaction(a)
-    s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward, env.state = r, s′
+"""
+`reset!` for PyArray state types
+"""
+function Reinforce.reset!(env::GymEnv{T}) where T <: PyArray
+    setdata!(env.state, pycall!(env.pystate, env.pyreset, PyObject))
+    return gymreset!(env)
 end
 
-function Reinforce.step!(env::UniverseEnv, s, a)
-    info("Going to take action: $a")
-    pyact = Any[pyaction(a)]
-    s′, r, env.done, env.info = env.pyenv[:step](pyact)
-    env.reward, env.state = r, s′
+"""
+`reset!` for non PyArray state types
+"""
+function Reinforce.reset!(env::GymEnv{T}) where T
+    pycall!(env.pystate, env.pyreset, PyObject)
+    env.state = convert(T, env.pystate)
+    return gymreset!(env)
 end
 
-Reinforce.finished(env::GymEnv, s′) = env.done
-Reinforce.finished(env::UniverseEnv, s′) = all(env.done)
+function gymreset!(env::GymEnv{T}) where T
+    env.reward = 0.0
+    env.total_reward = 0.0
+    env.actions = actions(env, nothing)
+    env.done = false
+    return env.state
+end
 
+"""
+`step!` for PyArray state
+"""
+function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray
+    pyact = pyaction(a)
+    pycall!(env.pystepres, env.pystep, PyObject, pyact)
 
-# --------------------------------------------------------------
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
+    setdata!(env.state, env.pystate)
 
+    return gymstep!(env)
+end
 
-function test_env(name::String = "CartPole-v0")
-    env = gym(name)
-    for sars′ in Episode(env, RandomPolicy())
-        render(env)
-    end
+"""
+step! for non-PyArray state
+"""
+function Reinforce.step!(env::GymEnv{T}, a) where T
+    pyact = pyaction(a)
+    pycall!(env.pystepres, env.pystep, PyObject, pyact)
+
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
+    env.state = convert(T, env.pystate)
+
+    return gymstep!(env)
+end
+
+@inline function gymstep!(env)
+    r = unsafe_gettpl!(env.pytplres, env.pystepres, Float64, 1)
+    env.done = unsafe_gettpl!(env.pytplres, env.pystepres, Bool, 2)
+    unsafe_gettpl!(env.info, env.pystepres, PyObject, 3)
+    env.total_reward += r
+    return (r, env.state)
 end
 
+Reinforce.finished(env::GymEnv) = env.done
+Reinforce.finished(env::GymEnv, s′) = env.done
 
+# --------------------------------------------------------------
 
-function __init__()
-    @static if is_linux()
-        # due to a ssl library bug, I have to first load the ssl lib here
-        condadir = Pkg.dir("Conda","deps","usr","lib")
-        Libdl.dlopen(joinpath(condadir, "libssl.so"))
-        Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so"))
-    end
+global const pygym = PyNULL()
+global const pysoccer = PyNULL()
 
-    global const pygym = pyimport("gym")
+function __init__()
+    # the copy! puts the gym module into `pygym`, handling python ref-counting
+    Base.copy!(pygym, pyimport("gym"))
 end
 
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,13 +1,136 @@
-using OpenAIGym
-using Base.Test
+using OpenAIGym, Compat.Test
+using PyCall
 
-# write your own tests here
-@test 1 == 1
+"""
+`function time_steps(env::GymEnv{T}, num_eps::Int) where T`
 
-if isinteractive()
-    env = GymEnv("CartPole-v0")
-    for i=1:5
-        R = run_episode(()->nothing, env, RandomPolicy())
-        info("Episode $i finished. Total reward: $R")
+run through num_eps eps, recording the time taken for each step and
+how many steps were made. Doesn't time the `reset!` or the first step of each
+episode (since higher chance that it's slower/faster than the rest, and we want
+to compare the average time taken for each step as fairly as possible)
+"""
+function time_steps(env::GymEnv, num_eps::Int)
+    t = 0.0
+    steps = 0
+    for i in 1:num_eps
+        reset!(env)
+        # step!(env, rand(env.actions)) # ignore the first step - it might be slow?
+        t += (@elapsed steps += epstep(env))
+    end
+    steps, t
+end
+
+"""
+Steps through an episode until it's `done`
+assumes env has been `reset!`
+"""
+function epstep(env::GymEnv)
+    steps = 0
+    while !env.done
+        steps += 1
+        r, s = step!(env, rand(env.actions))
+    end
+    steps
+end
+
+@testset "Gym Basics" begin
+
+    pong = GymEnv("Pong-v4")
+    pongnf = GymEnv("PongNoFrameskip-v4")
+    pacman = GymEnv("MsPacman-v4")
+    pacmannf = GymEnv("MsPacmanNoFrameskip-v4")
+    cartpole = GymEnv("CartPole-v0")
+    bj = GymEnv("Blackjack-v0", stateT=PyAny)
+
+    allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
+    eps2trial = Dict(pong=>2, pongnf=>1, pacman=>2, pacmannf=>1, cartpole=>400, bj=>30000)
+    atarienvs = [pong, pongnf, pacman, pacmannf]
+    envs = allenvs
+
+    @testset "envs load" begin
+        # check they all work - no errors == no worries
+        println("------------------------------ Check envs load ------------------------------")
+        for (i, env) in enumerate(envs)
+            a = rand(env.actions) |> OpenAIGym.pyaction
+            action_type = a |> PyObject |> pytypeof
+            println("env.pyenv: $(env.pyenv) action_type: $action_type (e.g. $a)")
+            time_steps(env, 1)
+            @test !ispynull(env.pyenv)
+            println("------------------------------")
+        end
+    end
+
+    @testset "julia speed test" begin
+        println("------------------------------ Begin Julia Speed Check ------------------------------")
+        for env in envs
+            num_eps = eps2trial[env]
+            steps, t = time_steps(env, num_eps)
+            println("env.pyenv: $(env.pyenv) num_eps: $num_eps t: $t steps: $steps")
+            println("microsecs/step (lower is better): ", t*1e6/steps)
+            println("------------------------------")
+        end
+        println("------------------------------ End Julia Speed Check ------------------------------\n")
+    end
+
+    @testset "python speed test" begin
+        println("------------------------------ Begin Python Speed Check ------------------------------")
+        py"""
+        import gym
+        import numpy as np
+
+        pong = gym.make("Pong-v4")
+        pongnf = gym.make("PongNoFrameskip-v4")
+        pacman = gym.make("MsPacman-v4");
+        pacmannf = gym.make("MsPacmanNoFrameskip-v4");
+        cartpole = gym.make("CartPole-v0")
+        bj = gym.make("Blackjack-v0")
+
+        allenvs = [pong, pongnf, pacman, pacmannf, cartpole, bj]
+        eps2trial = {pong: 2, pongnf: 1, pacman: 2, pacmannf: 1, cartpole: 400, bj: 30000}
+        atarienvs = [pong, pongnf, pacman, pacmannf];
+
+        envs = allenvs
+
+        import time
+        class Timer(object):
+            elapsed = 0.0
+            def __init__(self, name=None):
+                self.name = name
+
+            def __enter__(self):
+                self.tstart = time.time()
+
+            def __exit__(self, type, value, traceback):
+                Timer.elapsed = time.time() - self.tstart
+
+        def time_steps(env, num_eps):
+            t = 0.0
+            steps = 0
+            for i in range(num_eps):
+                env.reset()
+                with Timer():
+                    steps += epstep(env)
+                t += Timer.elapsed
+            return steps, t
+
+        def epstep(env):
+            steps = 0
+            while True:
+                steps += 1
+                action = env.action_space.sample()
+                state, reward, done, info = env.step(action)
+                if done == True:
+                    break
+            return steps
+
+        for env in envs:
+            num_eps = eps2trial[env]
+            with Timer():
+                steps, s = time_steps(env, num_eps)
+            t = Timer.elapsed
+            print(f"{env} num_eps: {num_eps} t: {t} steps: {steps} \n microsecs/step (lower is better): {t*1e6/steps}")
+            print("------------------------------")
+        """
+        println("------------------------------ End Python Speed Check ------------------------------")
     end
 end