diff --git a/.gitignore b/.gitignore index 7d879e2..9f1b198 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,9 @@ binding-rust/target # Haskell binding binding-hs/.stack-work/* binding-hs/*.tags +binding-hs/.cabal-sandbox +binding-hs/cabal.sandbox.config +binding-hs/dist # Clojure binding .nrepl-port diff --git a/binding-hs/CHANGELOG.md b/binding-hs/CHANGELOG.md new file mode 100644 index 0000000..943ae5e --- /dev/null +++ b/binding-hs/CHANGELOG.md @@ -0,0 +1,166 @@ +# Version 0.1.0.2 + +[tag v0.1.0.2](https://github.com/stites/gym-http-api/releases/tag/v0.1.0.2) + +- Expose all environments listed at https://gym.openai.com/envs/ as of 07/17/2018. Current environments supported includes: + + Classic Control Environments: + + CartPoleV0 + + CartPoleV1 + + AcrobotV1 + + MountainCarV0 + + MountainCarContinuousV0 + + PendulumV0 + + Toy Text Environments + + BlackjackV0 + + FrozenLakeV0 + + FrozenLake8x8V0 + + GuessingGameV0 + + HotterColderV0 + + NChainV0 + + RouletteV0 + + TaxiV2 + + Box2D Environments: + + BipedalWalkerV2 + + BipedalWalkerHardcoreV2 + + CarRacingV0 + + LunarLanderV2 + + LunarLanderContinuousV2 + + Algorithm Environments: + + CopyV0 + + DuplicatedInputV0 + + RepeatCopyV0 + + ReverseV0 + + ReversedAdditionV0 + + ReversedAddition3V0 + + Atari Environments: + + AirRaidRamV0 + + AirRaidV0 + + AlienRamV0 + + AlienV0 + + AmidarRamV0 + + AmidarV0 + + AssaultRamV0 + + AssaultV0 + + AsterixRamV0 + + AsterixV0 + + AsteroidsRamV0 + + AsteroidsV0 + + AtlantisRamV0 + + AtlantisV0 + + BankHeistRamV0 + + BankHeistV0 + + BattleZoneRamV0 + + BattleZoneV0 + + BeamRiderRamV0 + + BeamRiderV0 + + BerzerkRamV0 + + BerzerkV0 + + BowlingRamV0 + + BowlingV0 + + BoxingRamV0 + + BoxingV0 + + BreakoutRamV0 + + BreakoutV0 + + CarnivalRamV0 + + CarnivalV0 + + CentipedeRamV0 + + CentipedeV0 + + ChopperCommandRamV0 + + ChopperCommandV0 + + CrazyClimberRamV0 + + CrazyClimberV0 + + DemonAttackRamV0 + + DemonAttackV0 + + DoubleDunkRamV0 + + DoubleDunkV0 + + ElevatorActionRamV0 + + ElevatorActionV0 + + EnduroRamV0 + + EnduroV0 + + FishingDerbyRamV0 + + FishingDerbyV0 + + FreewayRamV0 + + FreewayV0 + + FrostbiteRamV0 + + FrostbiteV0 + + GopherRamV0 + + GopherV0 + + GravitarRamV0 + + GravitarV0 + + IceHockeyRamV0 + + IceHockeyV0 + + JamesbondRamV0 + + JamesbondV0 + + JourneyEscapeRamV0 + + JourneyEscapeV0 + + KangarooRamV0 + + KangarooV0 + + KrullRamV0 + + KrullV0 + + KungFuMasterRamV0 + + KungFuMasterV0 + + MontezumaRevengeRamV0 + + MontezumaRevengeV0 + + MsPacmanRamV0 + + MsPacmanV0 + + NameThisGameRamV0 + + NameThisGameV0 + + PhoenixRamV0 + + PhoenixV0 + + PitfallRamV0 + + PitfallV0 + + PongRamV0 + + PongV0 + + PooyanRamV0 + + PooyanV0 + + PrivateEyeRamV0 + + PrivateEyeV0 + + QbertRamV0 + + QbertV0 + + RiverraidRamV0 + + RiverraidV0 + + RoadRunnerRamV0 + + RoadRunnerV0 + + RobotankRamV0 + + RobotankV0 + + SeaquestRamV0 + + SeaquestV0 + + SkiingRamV0 + + SkiingV0 + + SolarisRamV0 + + SolarisV0 + + SpaceInvadersRamV0 + + SpaceInvadersV0 + + StarGunnerRamV0 + + StarGunnerV0 + + TennisRamV0 + + TennisV0 + + TimePilotRamV0 + + TimePilotV0 + + TutankhamRamV0 + + TutankhamV0 + + UpNDownRamV0 + + UpNDownV0 + + VentureRamV0 + + VentureV0 + + VideoPinballRamV0 + + VideoPinballV0 + + WizardOfWorRamV0 + + WizardOfWorV0 + + YarsRevengeRamV0 + + YarsRevengeV0 + + ZaxxonRamV0 + + ZaxxonV0 + +# Version 0.1.0.1 + +[tag v0.1.0.1](https://github.com/stites/gym-http-api/releases/tag/v0.1.0.1) + +- Bump compatability with lts-11 + +# Version 0.1.0.0 + +[tag v0.1.0.0](https://github.com/stites/gym-http-api/releases/tag/v0.1.0.0) + +- Initial release. API contains basic environments and Aeson-based functions. + diff --git a/binding-hs/LICENSE b/binding-hs/LICENSE new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/binding-hs/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/binding-hs/README.md b/binding-hs/README.md index 5068ec6..f1a3896 100644 --- a/binding-hs/README.md +++ b/binding-hs/README.md @@ -1,22 +1,15 @@ # Haskell Binding for the OpenAI gym open-source library -## Building +To run the example agent: + ``` -stack setup -stack build -stack exec example +stack build && stack exec example ``` -## Checklist -- [x] Implemented query functions -- [x] Added an example agent -- [ ] Added environment variable functionality to obtain the API key -- [ ] Optimization (lagging can be detected while running the example agent) -- [ ] Test suite +This library provides a servant-based REST client to the gym open-source library. +[openai/gym-http-api][openai] itself provides a [python-based REST server][flask] +to the gym open-source library, allowing development in languages other than python. + +[openai]:https://github.com/openai/gym-http-api +[flask]:https://github.com/openai/gym-http-api/blob/master/gym_http_server.py -## Required HTTP Libraries -- aeson -- http-client -- servant -- servant-client -- servant-lucid diff --git a/binding-hs/TODO.md b/binding-hs/TODO.md new file mode 100644 index 0000000..a528b77 --- /dev/null +++ b/binding-hs/TODO.md @@ -0,0 +1,6 @@ +## Checklist +- [ ] Add environment variable functionality to obtain the API key +- [ ] Optimization (lagging can be detected while running the example agent) +- [ ] Test suite + + diff --git a/binding-hs/examples/Agent.hs b/binding-hs/examples/Agent.hs index f82acfd..6cd1722 100644 --- a/binding-hs/examples/Agent.hs +++ b/binding-hs/examples/Agent.hs @@ -8,22 +8,27 @@ -- Example of how to build an agent using OpenAI.Gym.Client ------------------------------------------------------------------------------- {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE CPP #-} module Main where -import Prelude -import Control.Monad (replicateM_, when) -import Control.Monad.Catch -import Control.Exception.Base +import Control.Exception.Base +import Control.Monad (replicateM_, when) +import Control.Monad.Catch +import Prelude -import OpenAI.Gym -import Servant.Client -import Network.HTTP.Client +import Network.HTTP.Client +import OpenAI.Gym +import Servant.Client main :: IO () main = do manager <- newManager defaultManagerSettings +#if MIN_VERSION_servant_client(0,13,0) + out <- runClientM example (ClientEnv manager url Nothing) +#else out <- runClientM example (ClientEnv manager url) +#endif case out of Left err -> print err Right ok -> print ok diff --git a/binding-hs/gym-http-api.cabal b/binding-hs/gym-http-api.cabal index 9456a1c..f70ad92 100644 --- a/binding-hs/gym-http-api.cabal +++ b/binding-hs/gym-http-api.cabal @@ -1,26 +1,33 @@ --- This file has been generated from package.yaml by hpack version 0.15.0. +-- This file has been generated from package.yaml by hpack version 0.21.2. -- -- see: https://github.com/sol/hpack +-- +-- hash: 085a2053bc4ebb1d1451b833911deb047b05b2a99658d37c37af73e0bda7d26f name: gym-http-api -version: 0.1.0.0 -description: Haskell binding for the OpenAi gym API -category: Web -homepage: https://github.com/openai/gym-http-api#readme -bug-reports: https://github.com/openai/gym-http-api/issues +version: 0.1.0.1 +synopsis: REST client to the gym-http-api project +description: This library provides a REST client to the gym open-source library. gym-http-api itself provides a server to the gym open-source library, allowing development in languages other than python. + . + Note that the is a monorepo of all language-clients. This hackage library tracks which is the actively-maintained haskell fork. +category: Web, Learning Environments +homepage: https://github.com/stites/gym-http-api#readme +bug-reports: https://github.com/stites/gym-http-api/issues author: Daniel Lucsanszky, Sam Stites maintainer: dl3913@ic.ac.uk, sam@stites.io license: MIT -license-file: ../LICENSE +license-file: LICENSE build-type: Simple cabal-version: >= 1.10 extra-source-files: + CHANGELOG.md README.md + TODO.md source-repository head type: git - location: https://github.com/openai/gym-http-api + location: https://github.com/stites/gym-http-api subdir: binding-hs library @@ -28,23 +35,19 @@ library src default-extensions: OverloadedStrings build-depends: - exceptions - , http-client - , servant-client + aeson >=1.0 , base >=4.7 && <5 - , aeson - , monad-loops - , unordered-containers - , servant - , servant-lucid - , text - , transformers + , servant >=0.9 + , servant-client >=0.9 + , servant-lucid >=0.7 + , text >=1.2 + , unordered-containers >=0.2 exposed-modules: OpenAI.Gym OpenAI.Gym.API OpenAI.Gym.Data other-modules: - OpenAI.Gym.Prelude + Paths_gym_http_api default-language: Haskell2010 executable example @@ -53,9 +56,11 @@ executable example examples default-extensions: OverloadedStrings build-depends: - exceptions - , http-client - , servant-client - , base >=4.7 && <5 + base >=4.7 && <5 + , exceptions >=0.8 , gym-http-api + , http-client >=0.5 + , servant-client >=0.9 + other-modules: + Paths_gym_http_api default-language: Haskell2010 diff --git a/binding-hs/package.yaml b/binding-hs/package.yaml index 19b5ae3..c7e8b77 100644 --- a/binding-hs/package.yaml +++ b/binding-hs/package.yaml @@ -1,54 +1,49 @@ name: gym-http-api -version: '0.1.0.0' -description: Haskell binding for the OpenAi gym API -category: Web +version: '0.1.0.1' +description: Haskell bindings for the OpenAI gym API +category: Web, Learning Environments author: Daniel Lucsanszky, Sam Stites maintainer: dl3913@ic.ac.uk, sam@stites.io license: MIT -license-file: ../LICENSE -github: openai/gym-http-api/binding-hs +license-file: LICENSE +github: stites/gym-http-api/binding-hs +synopsis: REST client to the gym-http-api project +description: + This library provides a REST client to the gym open-source library. gym-http-api itself provides a + server + to the gym open-source library, allowing development in languages other than python. + + + Note that the is a monorepo of all + language-clients. This hackage library tracks + which is the actively-maintained haskell fork. extra-source-files: -- README.md + - README.md + - TODO.md + - CHANGELOG.md default-extensions: - OverloadedStrings dependencies: - - exceptions - - http-client - - servant-client + - base >= 4.7 && < 5 + - servant-client >= 0.9 library: source-dirs: src - other-modules: - - OpenAI.Gym.Prelude - dependencies: - - base >=4.7 && <5 - - aeson - - monad-loops - - unordered-containers - - servant - - servant-lucid - - text - - transformers + - aeson >= 1.0 + - unordered-containers >= 0.2 + - servant >= 0.9 + - servant-lucid >= 0.7 + - text >= 1.2 executables: example: main: Agent.hs source-dirs: examples dependencies: - - base >=4.7 && <5 - gym-http-api + - http-client >= 0.5 + - exceptions >= 0.8 -# tests: - # binding-hs-test: - # main: Spec.hs - # source-dirs: test - # ghc-options: - # - -threaded - # - -rtsopts - # - -with-rtsopts=-K1K - # dependencies: - # - base - # - binding-hs diff --git a/binding-hs/src/OpenAI/Gym.hs b/binding-hs/src/OpenAI/Gym.hs index c7a2ef3..31a020e 100644 --- a/binding-hs/src/OpenAI/Gym.hs +++ b/binding-hs/src/OpenAI/Gym.hs @@ -1,8 +1,17 @@ --- * reexports +------------------------------------------------------------------------------- +-- | +-- Module : OpenAI.Gym +-- License : MIT +-- Stability : experimental +-- Portability: non-portable +-- +-- re-exports of @OpenAI.Gym.API@ and @OpenAI.Gym.Data@ +------------------------------------------------------------------------------- module OpenAI.Gym - ( module X + ( module OpenAI.Gym.API + , module OpenAI.Gym.Data ) where -import OpenAI.Gym.API as X -import OpenAI.Gym.Data as X +import OpenAI.Gym.API +import OpenAI.Gym.Data diff --git a/binding-hs/src/OpenAI/Gym/API.hs b/binding-hs/src/OpenAI/Gym/API.hs index 03e07b8..1c7d820 100644 --- a/binding-hs/src/OpenAI/Gym/API.hs +++ b/binding-hs/src/OpenAI/Gym/API.hs @@ -4,12 +4,40 @@ -- License : MIT -- Stability : experimental -- Portability: non-portable +-- +-- Servant-client functions to interact with the flask server from +-- . ------------------------------------------------------------------------------- {-# LANGUAGE DataKinds #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeOperators #-} -module OpenAI.Gym.API where +{-# OPTIONS_GHC -Wno-orphans #-} --for MimeUnrender HTML () +module OpenAI.Gym.API ( + -- * Environment functions + envCreate + , envListAll + , envReset + , envStep + , envActionSpaceInfo + , envActionSpaceSample + , envActionSpaceContains + , envObservationSpaceInfo + , envMonitorStart + , envMonitorClose + , envClose + -- * Http-server commands + , upload + , shutdownServer + -- * Servant code + , gymAPI + ) where + +import Data.Aeson (Object) +import Data.Proxy (Proxy(..)) +import Servant.API +import Servant.HTML.Lucid (HTML) +import Servant.Client (ClientM, client) -import OpenAI.Gym.Prelude import OpenAI.Gym.Data type GymAPI @@ -28,23 +56,49 @@ type GymAPI :<|> "shutdown" :> Post '[HTML] ()) +-- | Proxy type for the full servant-client representation of the gym http api. gymAPI :: Proxy GymAPI gymAPI = Proxy -envCreate :: GymEnv -> ClientM InstID -envListAll :: ClientM Environment -envReset :: InstID -> ClientM Observation -envStep :: InstID -> Step -> ClientM Outcome -envActionSpaceInfo :: InstID -> ClientM Info -envActionSpaceSample :: InstID -> ClientM Action -envActionSpaceContains :: InstID -> Int -> ClientM Object -envObservationSpaceInfo :: InstID -> ClientM Info -envMonitorStart :: InstID -> Monitor -> ClientM () -envMonitorClose :: InstID -> ClientM () -envClose :: InstID -> ClientM () -upload :: Config -> ClientM () -shutdownServer :: ClientM () +-- | Create an instance of the specified environment (@POST \/v1\/envs\/@) +envCreate :: GymEnv -> ClientM InstID + +-- | List all environments running on the server (@GET \/v1\/envs\/@) +envListAll :: ClientM Environment + +-- | Reset the state of the environment and return an initial observation. (@POST \/v1\/envs\/\/reset\/@) +envReset :: InstID -> ClientM Observation + +-- | Step though an environment using an action. (@POST \/v1\/envs\/\/step\/@) +envStep :: InstID -> Step -> ClientM Outcome + +-- | Get information (name and dimensions\/bounds) of the env's action_space (@GET \/v1\/envs\/\/action_space\/@) +envActionSpaceInfo :: InstID -> ClientM Info + +-- | Sample randomly from the env's action_space (@GET \/v1\/envs\/\/action_space\/sample@) +envActionSpaceSample :: InstID -> ClientM Action + +-- | Check to see if a value is valid in the env's action_space (@GET \/v1\/envs\/\/action_space\/contains\/@) +envActionSpaceContains :: InstID -> Int -> ClientM Object + +-- | Get information (name and dimensions\/bounds) of the env's observation_space (@GET \/v1\/envs\/\/observation_space\/@) +envObservationSpaceInfo :: InstID -> ClientM Info + +-- | Start monitoring (@POST \/v1\/envs\/\/monitor\/start\/@) +envMonitorStart :: InstID -> Monitor -> ClientM () + +-- | Flush all monitor data to disk (@POST \/v1\/envs\/\/monitor\/close\/@) +envMonitorClose :: InstID -> ClientM () + +-- | Stop the environment (@POST \/v1\/envs\/\/close\/@) +envClose :: InstID -> ClientM () + +-- | Upload results to OpenAI's servers (@POST \/v1\/upload\/@) +upload :: Config -> ClientM () + +-- | Request a server shutdown (@POST \/v1\/shutdown\/@) +shutdownServer :: ClientM () (envCreate @@ -62,4 +116,5 @@ shutdownServer :: ClientM () :<|> shutdownServer = client gymAPI - +instance MimeUnrender HTML () where + mimeUnrender _ _ = return () diff --git a/binding-hs/src/OpenAI/Gym/Data.hs b/binding-hs/src/OpenAI/Gym/Data.hs index 76b1b27..e999d86 100644 --- a/binding-hs/src/OpenAI/Gym/Data.hs +++ b/binding-hs/src/OpenAI/Gym/Data.hs @@ -4,6 +4,8 @@ -- License : MIT -- Stability : experimental -- Portability: non-portable +-- +-- Aeson-based data types to be returned by "OpenAI.Gym.API" ------------------------------------------------------------------------------- {-# LANGUAGE DeriveGeneric #-} module OpenAI.Gym.Data @@ -19,12 +21,18 @@ module OpenAI.Gym.Data , Config (..) ) where -import OpenAI.Gym.Prelude +import Data.Aeson (ToJSON(..), FromJSON(..), Value(..), Object, (.=), (.:), object) +import Data.Aeson.Types (Parser) +import Data.HashMap.Strict (HashMap) +import Data.Text (Text) +import GHC.Generics (Generic) +import Servant.API (ToHttpApiData(..)) import qualified Data.Text as T import qualified Data.Aeson as A +-- | Game Environments data GymEnv - -- | Classic Control Environments + -- Classic Control Environments = CartPoleV0 -- ^ Balance a pole on a cart (for a short time). | CartPoleV1 -- ^ Balance a pole on a cart. | AcrobotV1 -- ^ Swing up a two-link robot. @@ -33,30 +41,307 @@ data GymEnv | PendulumV0 -- ^ Swing up a pendulum. -- Toy text games - | FrozenLakeV0 -- ^ Swing up a pendulum. - - -- | Atari Games - | PongRamV0 -- ^ Maximize score in the game Pong, with RAM as input - | PongV0 -- ^ Maximize score in the game Pong + | BlackjackV0 -- ^ Play Blackjack against a computer dealer + | FrozenLakeV0 -- ^ Find a safe path across a grid of ice and water tiles. + | FrozenLake8x8V0 -- ^ Find a safe path across a grid of ice and water tiles. + | GuessingGameV0 -- ^ Guess close to randomly selected number + | HotterColderV0 -- ^ Guess close to a random selected number using hints + | NChainV0 -- ^ Traverse a linear chain of states + | RouletteV0 -- ^ Learn a winning strategy for playing roulette. + | TaxiV2 -- ^ As a taxi driver, you need to pick up and drop off passengers as fast as possible. + + -- Box2D + | BipedalWalkerV2 -- ^ Train a bipedal robot to walk. + | BipedalWalkerHardcoreV2 -- ^ Train a bipedal robot to walk over rough terrain. + | CarRacingV0 -- ^ Race a car around a track. + | LunarLanderV2 -- ^ Navigate a lander to its landing pad. + | LunarLanderContinuousV2 -- ^ Navigate a lander to its landing pad. + + -- Algorithms + | CopyV0 -- ^ Copy symbols from the input tape. + | DuplicatedInputV0 -- ^ Copy and deduplicate data from the input tape. + | RepeatCopyV0 -- ^ Copy symbols from the input tape multiple times. + | ReverseV0 -- ^ Reverse the symbols on the input tape. + | ReversedAdditionV0 -- ^ Learn to add multi-digit numbers. + | ReversedAddition3V0 -- ^ Learn to add three multi-digit numbers. + + -- Atari Games + | AirRaidRamV0 -- ^ Maximize score in the game AirRaid, with RAM as input + | AirRaidV0 -- ^ Maximize score in the game AirRaid, with screen images as input + | AlienRamV0 -- ^ Maximize score in the game Alien, with RAM as input + | AlienV0 -- ^ Maximize score in the game Alien, with screen images as input + | AmidarRamV0 -- ^ Maximize score in the game Amidar, with RAM as input + | AmidarV0 -- ^ Maximize score in the game Amidar, with screen images as input + | AssaultRamV0 -- ^ Maximize score in the game Assault, with RAM as input + | AssaultV0 -- ^ Maximize score in the game Assault, with screen images as input + | AsterixRamV0 -- ^ Maximize score in the game Asterix, with RAM as input + | AsterixV0 -- ^ Maximize score in the game Asterix, with screen images as input + | AsteroidsRamV0 -- ^ Maximize score in the game Asteroids, with RAM as input + | AsteroidsV0 -- ^ Maximize score in the game Asteroids, with screen images as input + | AtlantisRamV0 -- ^ Maximize score in the game Atlantis, with RAM as input + | AtlantisV0 -- ^ Maximize score in the game Atlantis, with screen images as input + | BankHeistRamV0 -- ^ Maximize score in the game BankHeist, with RAM as input + | BankHeistV0 -- ^ Maximize score in the game BankHeist, with screen images as input + | BattleZoneRamV0 -- ^ Maximize score in the game BattleZone, with RAM as input + | BattleZoneV0 -- ^ Maximize score in the game BattleZone, with screen images as input + | BeamRiderRamV0 -- ^ Maximize score in the game BeamRider, with RAM as input + | BeamRiderV0 -- ^ Maximize score in the game BeamRider, with screen images as input + | BerzerkRamV0 -- ^ Maximize score in the game Berzerk, with RAM as input + | BerzerkV0 -- ^ Maximize score in the game Berzerk, with screen images as input + | BowlingRamV0 -- ^ Maximize score in the game Bowling, with RAM as input + | BowlingV0 -- ^ Maximize score in the game Bowling, with screen images as input + | BoxingRamV0 -- ^ Maximize score in the game Boxing, with RAM as input + | BoxingV0 -- ^ Maximize score in the game Boxing, with screen images as input + | BreakoutRamV0 -- ^ Maximize score in the game Breakout, with RAM as input + | BreakoutV0 -- ^ Maximize score in the game Breakout, with screen images as input + | CarnivalRamV0 -- ^ Maximize score in the game Carnival, with RAM as input + | CarnivalV0 -- ^ Maximize score in the game Carnival, with screen images as input + | CentipedeRamV0 -- ^ Maximize score in the game Centipede, with RAM as input + | CentipedeV0 -- ^ Maximize score in the game Centipede, with screen images as input + | ChopperCommandRamV0 -- ^ Maximize score in the game ChopperCommand, with RAM as input + | ChopperCommandV0 -- ^ Maximize score in the game ChopperCommand, with screen images as input + | CrazyClimberRamV0 -- ^ Maximize score in the game CrazyClimber, with RAM as input + | CrazyClimberV0 -- ^ Maximize score in the game CrazyClimber, with screen images as input + | DemonAttackRamV0 -- ^ Maximize score in the game DemonAttack, with RAM as input + | DemonAttackV0 -- ^ Maximize score in the game DemonAttack, with screen images as input + | DoubleDunkRamV0 -- ^ Maximize score in the game DoubleDunk, with RAM as input + | DoubleDunkV0 -- ^ Maximize score in the game DoubleDunk, with screen images as input + | ElevatorActionRamV0 -- ^ Maximize score in the game ElevatorAction, with RAM as input + | ElevatorActionV0 -- ^ Maximize score in the game ElevatorAction, with screen images as input + | EnduroRamV0 -- ^ Maximize score in the game Enduro, with RAM as input + | EnduroV0 -- ^ Maximize score in the game Enduro, with screen images as input + | FishingDerbyRamV0 -- ^ Maximize score in the game FishingDerby, with RAM as input + | FishingDerbyV0 -- ^ Maximize score in the game FishingDerby, with screen images as input + | FreewayRamV0 -- ^ Maximize score in the game Freeway, with RAM as input + | FreewayV0 -- ^ Maximize score in the game Freeway, with screen images as input + | FrostbiteRamV0 -- ^ Maximize score in the game Frostbite, with RAM as input + | FrostbiteV0 -- ^ Maximize score in the game Frostbite, with screen images as input + | GopherRamV0 -- ^ Maximize score in the game Gopher, with RAM as input + | GopherV0 -- ^ Maximize score in the game Gopher, with screen images as input + | GravitarRamV0 -- ^ Maximize score in the game Gravitar, with RAM as input + | GravitarV0 -- ^ Maximize score in the game Gravitar, with screen images as input + | IceHockeyRamV0 -- ^ Maximize score in the game IceHockey, with RAM as input + | IceHockeyV0 -- ^ Maximize score in the game IceHockey, with screen images as input + | JamesbondRamV0 -- ^ Maximize score in the game Jamesbond, with RAM as input + | JamesbondV0 -- ^ Maximize score in the game Jamesbond, with screen images as input + | JourneyEscapeRamV0 -- ^ Maximize score in the game JourneyEscape, with RAM as input + | JourneyEscapeV0 -- ^ Maximize score in the game JourneyEscape, with screen images as input + | KangarooRamV0 -- ^ Maximize score in the game Kangaroo, with RAM as input + | KangarooV0 -- ^ Maximize score in the game Kangaroo, with screen images as input + | KrullRamV0 -- ^ Maximize score in the game Krull, with RAM as input + | KrullV0 -- ^ Maximize score in the game Krull, with screen images as input + | KungFuMasterRamV0 -- ^ Maximize score in the game KungFuMaster, with RAM as input + | KungFuMasterV0 -- ^ Maximize score in the game KungFuMaster, with screen images as input + | MontezumaRevengeRamV0 -- ^ Maximize score in the game MontezumaRevenge, with RAM as input + | MontezumaRevengeV0 -- ^ Maximize score in the game MontezumaRevenge, with screen images as input + | MsPacmanRamV0 -- ^ Maximize score in the game MsPacman, with RAM as input + | MsPacmanV0 -- ^ Maximize score in the game MsPacman, with screen images as input + | NameThisGameRamV0 -- ^ Maximize score in the game NameThisGame, with RAM as input + | NameThisGameV0 -- ^ Maximize score in the game NameThisGame, with screen images as input + | PhoenixRamV0 -- ^ Maximize score in the game Phoenix, with RAM as input + | PhoenixV0 -- ^ Maximize score in the game Phoenix, with screen images as input + | PitfallRamV0 -- ^ Maximize score in the game Pitfall, with RAM as input + | PitfallV0 -- ^ Maximize score in the game Pitfall, with screen images as input + | PongRamV0 -- ^ Maximize score in the game Pong, with RAM as input + | PongV0 -- ^ Maximize score in the game Pong, with screen images as input + | PooyanRamV0 -- ^ Maximize score in the game Pooyan, with RAM as input + | PooyanV0 -- ^ Maximize score in the game Pooyan, with screen images as input + | PrivateEyeRamV0 -- ^ Maximize score in the game PrivateEye, with RAM as input + | PrivateEyeV0 -- ^ Maximize score in the game PrivateEye, with screen images as input + | QbertRamV0 -- ^ Maximize score in the game Qbert, with RAM as input + | QbertV0 -- ^ Maximize score in the game Qbert, with screen images as input + | RiverraidRamV0 -- ^ Maximize score in the game Riverraid, with RAM as input + | RiverraidV0 -- ^ Maximize score in the game Riverraid, with screen images as input + | RoadRunnerRamV0 -- ^ Maximize score in the game RoadRunner, with RAM as input + | RoadRunnerV0 -- ^ Maximize score in the game RoadRunner, with screen images as input + | RobotankRamV0 -- ^ Maximize score in the game Robotank, with RAM as input + | RobotankV0 -- ^ Maximize score in the game Robotank, with screen images as input + | SeaquestRamV0 -- ^ Maximize score in the game Seaquest, with RAM as input + | SeaquestV0 -- ^ Maximize score in the game Seaquest, with screen images as input + | SkiingRamV0 -- ^ Maximize score in the game Skiing, with RAM as input + | SkiingV0 -- ^ Maximize score in the game Skiing, with screen images as input + | SolarisRamV0 -- ^ Maximize score in the game Solaris, with RAM as input + | SolarisV0 -- ^ Maximize score in the game Solaris, with screen images as input + | SpaceInvadersRamV0 -- ^ Maximize score in the game SpaceInvaders, with RAM as input + | SpaceInvadersV0 -- ^ Maximize score in the game SpaceInvaders, with screen images as input + | StarGunnerRamV0 -- ^ Maximize score in the game StarGunner, with RAM as input + | StarGunnerV0 -- ^ Maximize score in the game StarGunner, with screen images as input + | TennisRamV0 -- ^ Maximize score in the game Tennis, with RAM as input + | TennisV0 -- ^ Maximize score in the game Tennis, with screen images as input + | TimePilotRamV0 -- ^ Maximize score in the game TimePilot, with RAM as input + | TimePilotV0 -- ^ Maximize score in the game TimePilot, with screen images as input + | TutankhamRamV0 -- ^ Maximize score in the game Tutankham, with RAM as input + | TutankhamV0 -- ^ Maximize score in the game Tutankham, with screen images as input + | UpNDownRamV0 -- ^ Maximize score in the game UpNDown, with RAM as input + | UpNDownV0 -- ^ Maximize score in the game UpNDown, with screen images as input + | VentureRamV0 -- ^ Maximize score in the game Venture, with RAM as input + | VentureV0 -- ^ Maximize score in the game Venture, with screen images as input + | VideoPinballRamV0 -- ^ Maximize score in the game VideoPinball, with RAM as input + | VideoPinballV0 -- ^ Maximize score in the game VideoPinball, with screen images as input + | WizardOfWorRamV0 -- ^ Maximize score in the game WizardOfWor, with RAM as input + | WizardOfWorV0 -- ^ Maximize score in the game WizardOfWor, with screen images as input + | YarsRevengeRamV0 -- ^ Maximize score in the game YarsRevenge, with RAM as input + | YarsRevengeV0 -- ^ Maximize score in the game YarsRevenge, with screen images as input + | ZaxxonRamV0 -- ^ Maximize score in the game Zaxxon, with RAM as input + | ZaxxonV0 -- ^ Maximize score in the game Zaxxon, with screen images as input deriving (Eq, Enum, Ord) - instance Show GymEnv where - show CartPoleV0 = "CartPole-v0" - show CartPoleV1 = "CartPole-v1" - show AcrobotV1 = "Acrobot-v1" - show MountainCarV0 = "MountainCar-v0" - show MountainCarContinuousV0 = "MountainCarContinuous-v0" - show PendulumV0 = "Pendulum-v0" - show FrozenLakeV0 = "FrozenLake-v0" - show PongRamV0 = "Pong-ram-v0" - show PongV0 = "Pong-v0" + show = \case + CartPoleV0 -> "CartPole-v0" + CartPoleV1 -> "CartPole-v1" + AcrobotV1 -> "Acrobot-v1" + MountainCarV0 -> "MountainCar-v0" + MountainCarContinuousV0 -> "MountainCarContinuous-v0" + PendulumV0 -> "Pendulum-v0" + + BlackjackV0 -> "Blackjack-v0" + FrozenLakeV0 -> "FrozenLake-v0" + FrozenLake8x8V0 -> "FrozenLake8x8-v0" + GuessingGameV0 -> "GuessingGame-v0" + HotterColderV0 -> "HotterColder-v0" + NChainV0 -> "NChain-v0" + RouletteV0 -> "Roulette-v0" + TaxiV2 -> "Taxi-v2" + + BipedalWalkerV2 -> "BipedalWalker-v2" + BipedalWalkerHardcoreV2 -> "BipedalWalkerHardcore-v2" + CarRacingV0 -> "CarRacing-v0" + LunarLanderV2 -> "LunarLander-v2" + LunarLanderContinuousV2 -> "LunarLanderContinuous-v2" + + CopyV0 -> "Copy-v0" + DuplicatedInputV0 -> "DuplicatedInput-v0" + RepeatCopyV0 -> "RepeatCopy-v0" + ReverseV0 -> "Reverse-v0" + ReversedAdditionV0 -> "ReversedAddition-v0" + ReversedAddition3V0 -> "ReversedAddition3-v0" + + AirRaidRamV0 -> "AirRaid-ram-v0" + AirRaidV0 -> "AirRaid-v0" + AlienRamV0 -> "Alien-ram-v0" + AlienV0 -> "Alien-v0" + AmidarRamV0 -> "Amidar-ram-v0" + AmidarV0 -> "Amidar-v0" + AssaultRamV0 -> "Assault-ram-v0" + AssaultV0 -> "Assault-v0" + AsterixRamV0 -> "Asterix-ram-v0" + AsterixV0 -> "Asterix-v0" + AsteroidsRamV0 -> "Asteroids-ram-v0" + AsteroidsV0 -> "Asteroids-v0" + AtlantisRamV0 -> "Atlantis-ram-v0" + AtlantisV0 -> "Atlantis-v0" + BankHeistRamV0 -> "BankHeist-ram-v0" + BankHeistV0 -> "BankHeist-v0" + BattleZoneRamV0 -> "BattleZone-ram-v0" + BattleZoneV0 -> "BattleZone-v0" + BeamRiderRamV0 -> "BeamRider-ram-v0" + BeamRiderV0 -> "BeamRider-v0" + BerzerkRamV0 -> "Berzerk-ram-v0" + BerzerkV0 -> "Berzerk-v0" + BowlingRamV0 -> "Bowling-ram-v0" + BowlingV0 -> "Bowling-v0" + BoxingRamV0 -> "Boxing-ram-v0" + BoxingV0 -> "Boxing-v0" + BreakoutRamV0 -> "Breakout-ram-v0" + BreakoutV0 -> "Breakout-v0" + CarnivalRamV0 -> "Carnival-ram-v0" + CarnivalV0 -> "Carnival-v0" + CentipedeRamV0 -> "Centipede-ram-v0" + CentipedeV0 -> "Centipede-v0" + ChopperCommandRamV0 -> "ChopperCommand-ram-v0" + ChopperCommandV0 -> "ChopperCommand-v0" + CrazyClimberRamV0 -> "CrazyClimber-ram-v0" + CrazyClimberV0 -> "CrazyClimber-v0" + DemonAttackRamV0 -> "DemonAttack-ram-v0" + DemonAttackV0 -> "DemonAttack-v0" + DoubleDunkRamV0 -> "DoubleDunk-ram-v0" + DoubleDunkV0 -> "DoubleDunk-v0" + ElevatorActionRamV0 -> "ElevatorAction-ram-v0" + ElevatorActionV0 -> "ElevatorAction-v0" + EnduroRamV0 -> "Enduro-ram-v0" + EnduroV0 -> "Enduro-v0" + FishingDerbyRamV0 -> "FishingDerby-ram-v0" + FishingDerbyV0 -> "FishingDerby-v0" + FreewayRamV0 -> "Freeway-ram-v0" + FreewayV0 -> "Freeway-v0" + FrostbiteRamV0 -> "Frostbite-ram-v0" + FrostbiteV0 -> "Frostbite-v0" + GopherRamV0 -> "Gopher-ram-v0" + GopherV0 -> "Gopher-v0" + GravitarRamV0 -> "Gravitar-ram-v0" + GravitarV0 -> "Gravitar-v0" + IceHockeyRamV0 -> "IceHockey-ram-v0" + IceHockeyV0 -> "IceHockey-v0" + JamesbondRamV0 -> "Jamesbond-ram-v0" + JamesbondV0 -> "Jamesbond-v0" + JourneyEscapeRamV0 -> "JourneyEscape-ram-v0" + JourneyEscapeV0 -> "JourneyEscape-v0" + KangarooRamV0 -> "Kangaroo-ram-v0" + KangarooV0 -> "Kangaroo-v0" + KrullRamV0 -> "Krull-ram-v0" + KrullV0 -> "Krull-v0" + KungFuMasterRamV0 -> "KungFuMaster-ram-v0" + KungFuMasterV0 -> "KungFuMaster-v0" + MontezumaRevengeRamV0 -> "MontezumaRevenge-ram-v0" + MontezumaRevengeV0 -> "MontezumaRevenge-v0" + MsPacmanRamV0 -> "MsPacman-ram-v0" + MsPacmanV0 -> "MsPacman-v0" + NameThisGameRamV0 -> "NameThisGame-ram-v0" + NameThisGameV0 -> "NameThisGame-v0" + PhoenixRamV0 -> "Phoenix-ram-v0" + PhoenixV0 -> "Phoenix-v0" + PitfallRamV0 -> "Pitfall-ram-v0" + PitfallV0 -> "Pitfall-v0" + PongRamV0 -> "Pong-ram-v0" + PongV0 -> "Pong-v0" + PooyanRamV0 -> "Pooyan-ram-v0" + PooyanV0 -> "Pooyan-v0" + PrivateEyeRamV0 -> "PrivateEye-ram-v0" + PrivateEyeV0 -> "PrivateEye-v0" + QbertRamV0 -> "Qbert-ram-v0" + QbertV0 -> "Qbert-v0" + RiverraidRamV0 -> "Riverraid-ram-v0" + RiverraidV0 -> "Riverraid-v0" + RoadRunnerRamV0 -> "RoadRunner-ram-v0" + RoadRunnerV0 -> "RoadRunner-v0" + RobotankRamV0 -> "Robotank-ram-v0" + RobotankV0 -> "Robotank-v0" + SeaquestRamV0 -> "Seaquest-ram-v0" + SeaquestV0 -> "Seaquest-v0" + SkiingRamV0 -> "Skiing-ram-v0" + SkiingV0 -> "Skiing-v0" + SolarisRamV0 -> "Solaris-ram-v0" + SolarisV0 -> "Solaris-v0" + SpaceInvadersRamV0 -> "SpaceInvaders-ram-v0" + SpaceInvadersV0 -> "SpaceInvaders-v0" + StarGunnerRamV0 -> "StarGunner-ram-v0" + StarGunnerV0 -> "StarGunner-v0" + TennisRamV0 -> "Tennis-ram-v0" + TennisV0 -> "Tennis-v0" + TimePilotRamV0 -> "TimePilot-ram-v0" + TimePilotV0 -> "TimePilot-v0" + TutankhamRamV0 -> "Tutankham-ram-v0" + TutankhamV0 -> "Tutankham-v0" + UpNDownRamV0 -> "UpNDown-ram-v0" + UpNDownV0 -> "UpNDown-v0" + VentureRamV0 -> "Venture-ram-v0" + VentureV0 -> "Venture-v0" + VideoPinballRamV0 -> "VideoPinball-ram-v0" + VideoPinballV0 -> "VideoPinball-v0" + WizardOfWorRamV0 -> "WizardOfWor-ram-v0" + WizardOfWorV0 -> "WizardOfWor-v0" + YarsRevengeRamV0 -> "YarsRevenge-ram-v0" + YarsRevengeV0 -> "YarsRevenge-v0" + ZaxxonRamV0 -> "Zaxxon-ram-v0" + ZaxxonV0 -> "Zaxxon-v0" instance ToJSON GymEnv where toJSON env = object [ "env_id" .= show env ] - -data InstID = InstID !Text +-- | a short identifier (such as '3c657dbc') for the created environment instance. +-- The instance_id is used in future API calls to identify the environment to be manipulated. +newtype InstID = InstID { getInstID :: Text } deriving (Eq, Show, Generic) instance ToHttpApiData InstID where @@ -68,15 +353,15 @@ instance ToJSON InstID where instance FromJSON InstID where parseJSON = parseSingleton InstID "instance_id" - +-- | a mapping of instance_id to env_id (e.g. {'3c657dbc': 'CartPole-v0'}) for every env on the server newtype Environment = Environment { all_envs :: HashMap Text Text } deriving (Eq, Show, Generic) instance ToJSON Environment instance FromJSON Environment - -data Observation = Observation !Value +-- | The agent's observation of the current environment +newtype Observation = Observation { getObservation :: Value } deriving (Eq, Show, Generic) instance ToJSON Observation where @@ -85,7 +370,7 @@ instance ToJSON Observation where instance FromJSON Observation where parseJSON = parseSingleton Observation "observation" - +-- | An action to take in the environment and whether or not to render that change data Step = Step { action :: !Value , render :: !Bool @@ -93,19 +378,19 @@ data Step = Step instance ToJSON Step - +-- | The result of taking a step in an environment data Outcome = Outcome - { observation :: !Value - , reward :: !Double - , done :: !Bool - , info :: !Object + { observation :: !Value -- ^ agent's observation of the current environment + , reward :: !Double -- ^ amount of reward returned after previous action + , done :: !Bool -- ^ whether the episode has ended + , info :: !Object -- ^ a dict containing auxiliary diagnostic information } deriving (Eq, Show, Generic) instance ToJSON Outcome instance FromJSON Outcome - -data Info = Info !Object +-- | A dict containing auxiliary diagnostic information +newtype Info = Info { getInfo :: Object } deriving (Eq, Show, Generic) instance ToJSON Info where @@ -114,8 +399,8 @@ instance ToJSON Info where instance FromJSON Info where parseJSON = parseSingleton Info "info" - -data Action = Action !Value +-- | An action to take in the environment +newtype Action = Action { getAction :: Value } deriving (Eq, Show, Generic) instance ToJSON Action where @@ -124,23 +409,36 @@ instance ToJSON Action where instance FromJSON Action where parseJSON = parseSingleton Action "action" - +-- | Parameters used to start a monitoring session. data Monitor = Monitor - { directory :: !Text - , force :: !Bool - , resume :: !Bool - , video_callable :: !Bool + { directory :: !Text -- ^ directory to use for monitoring + , force :: !Bool -- ^ Clear out existing training data from this directory (by deleting + -- every file prefixed with "openaigym.") (default=False) + , resume :: !Bool -- ^ Retain the training data already in this directory, which will be + -- merged with our new data. (default=False) + , video_callable :: !Bool -- ^ video_callable parameter from the native env.monitor.start function } deriving (Generic, Eq, Show) instance ToJSON Monitor - +-- | Parameters used to upload a monitored session to OpenAI's servers data Config = Config - { training_dir :: !Text - , algorithm_id :: !Text - , api_key :: !Text + { training_dir :: !Text -- ^ A directory containing the results of a training run. + , algorithm_id :: !Text -- ^ An arbitrary string indicating the paricular version of the + -- algorithm (including choices of parameters) you are running. + -- (default=None) + , api_key :: !Text -- ^ Your OpenAI API key } deriving (Generic, Eq, Show) instance ToJSON Config +-- | helper to parse a singleton object from aeson +parseSingleton :: FromJSON a => (a -> b) -> Text -> Value -> Parser b +parseSingleton fn f (Object v) = fn <$> v .: f +parseSingleton fn f _ = mempty + +-- | convert a value into a singleton object +toSingleton :: ToJSON a => Text -> a -> Value +toSingleton f a = object [ f .= toJSON a ] + diff --git a/binding-hs/src/OpenAI/Gym/Prelude.hs b/binding-hs/src/OpenAI/Gym/Prelude.hs deleted file mode 100644 index aaf6242..0000000 --- a/binding-hs/src/OpenAI/Gym/Prelude.hs +++ /dev/null @@ -1,43 +0,0 @@ -------------------------------------------------------------------------------- --- | --- Module : OpenAI.Gym.Client --- License : MIT --- Stability : experimental --- Portability: non-portable -------------------------------------------------------------------------------- -{-# LANGUAGE MultiParamTypeClasses #-} -{-# OPTIONS_GHC -Wno-orphans #-} -module OpenAI.Gym.Prelude - ( module P - , parseSingleton - , toSingleton - ) where - -import Control.Monad as P -import Control.Monad.Loops as P -import Control.Monad.Trans.Except as P (runExceptT) -import Control.Monad.Trans.Reader as P -import Control.Monad.Trans.Class as P -import Data.Aeson as P -import Data.HashMap.Strict as P (HashMap) -import Data.Proxy as P -import Data.Text as P (Text) -import GHC.Generics as P -import Network.HTTP.Client as P hiding (Proxy, responseBody, responseStatus) -import Servant.API as P -import Servant.Client as P -import Servant.HTML.Lucid as P (HTML) -import Control.Monad.IO.Class as P -import Prelude as P -import Data.Aeson.Types (Parser) - -instance MimeUnrender HTML () where - mimeUnrender _ _ = return () - -parseSingleton :: FromJSON a => (a -> b) -> Text -> Value -> Parser b -parseSingleton fn f (Object v) = fn <$> v .: f -parseSingleton fn f _ = mempty - -toSingleton :: ToJSON a => Text -> a -> Value -toSingleton f a = object [ f .= toJSON a ] - diff --git a/binding-hs/stack-lts-7.19.yaml b/binding-hs/stack-lts-7.19.yaml deleted file mode 100644 index f51bdc9..0000000 --- a/binding-hs/stack-lts-7.19.yaml +++ /dev/null @@ -1,6 +0,0 @@ -resolver: lts-8.4 -packages: -- '.' -extra-deps: [] -flags: {} -extra-package-dbs: [] diff --git a/binding-hs/stack.yaml b/binding-hs/stack.yaml index f51bdc9..ea52f7d 100644 --- a/binding-hs/stack.yaml +++ b/binding-hs/stack.yaml @@ -1,4 +1,4 @@ -resolver: lts-8.4 +resolver: lts-11.13 packages: - '.' extra-deps: []