{
"all_wheels_on_track": Boolean, # flag to indicate if the vehicle is on the track
"x": float, # vehicle's x-coordinate in meters
"y": float, # vehicle's y-coordinate in meters
"distance_from_center": float, # distance in meters from the track center
"is_left_of_center": Boolean, # Flag to indicate if the vehicle is on the left side to the track center or not.
"heading": float, # vehicle's yaw in degrees
"progress": float, # percentage of track completed.
"steps": int, # number steps completed
"speed": float, # vehicle's speed in meters per second (m/s)
"steering_angle": float, # vehicle's steering angle in degrees
"track_width": float, # width of the track
"waypoints": [[float, float], … ], # list of [x,y] as milestones along the track center
"closest_waypoints": [int, int] # indices of the two nearest waypoints.
}
Example 1
importosimportsysimporttimeimportloggingfromDeepRacer_gymimportCustomRewardWrapperfromDeepRacer_gymimportDeepRacerActionWrapperimportDeepRacer_gymasdeepracerLOG=logging.getLogger()
# Define your custom reward functiondefreward_fn(params):
''' Example of using all_wheels_on_track and speed '''# Read input variablesall_wheels_on_track=params['all_wheels_on_track']
speed=params['speed']
# Set the speed threshold based your action spaceSPEED_THRESHOLD=1.0ifnotall_wheels_on_track:
# Penalize if te car goes off trackreward=1e-3elifspeed<SPEED_THRESHOLD:
# Penalize if the car goes too slowreward=0.5else:
# High reward if the car stays on track and goes fastreward=1.0returnreward# Create environmentenv=deepracer.make('NewYorkCity-v0')
env=CustomRewardWrapper(env, reward_fn)
env=DeepRacerActionWrapper(env, max_steering_angle=30,
steering_angle_granularity=5,
max_speed=3,
speed_granularity=3)
# Print action space infoprint(env.action_space)
action_table=env.action_table()
# Print action tableprint('Action number\t\tSteering\t\tSpeed')
fortinaction_table:
print('{}\t\t\t{}\t\t\t{}'.format(t['Action number'], t['Steering'], t['Speed']))
MAXIMUM_STEPS=1000state=env.reset()
forstepinrange(MAXIMUM_STEPS):
action=env.action_space.sample() # random samplestate, reward, done, info=env.step(action)
LOG.info("[step {:4d}] action: ({:6.2f}, {:6.2f}), speed: {:10.6f}, steering: {:10.2f}, xy: ({:10.6f}, {:10.6f}), all_wheels_on_track: {}, closest_waypoints: {}".format(
info['steps'], action_table[action]['Speed'], action_table[action]['Steering'], info['speed'], info['steering_angle'], info['x'], info['y'], info['all_wheels_on_track'], info['closest_waypoints']))
env.close()
Example 2: Integrating with stable_baselines
importosimportsysimporttimeimportloggingfromDeepRacer_gymimportCustomRewardWrapperfromDeepRacer_gymimportDeepRacerActionWrapperimportDeepRacer_gymasdeepracerfromstable_baselinesimportPPO2fromstable_baselines.common.vec_envimportDummyVecEnvfromstable_baselines.common.policiesimportMlpPolicy# Define your custom reward functiondefreward_fn(params):
''' Example of using all_wheels_on_track and speed '''# Read input variablesall_wheels_on_track=params['all_wheels_on_track']
speed=params['speed']
# Set the speed threshold based your action spaceSPEED_THRESHOLD=1.0ifnotall_wheels_on_track:
# Penalize if te car goes off trackreward=1e-3elifspeed<SPEED_THRESHOLD:
# Penalize if the car goes too slowreward=0.5else:
# High reward if the car stays on track and goes fastreward=1.0returnreward# Create environmentenv=deepracer.make('NewYorkCity-v0')
env=CustomRewardWrapper(env, reward_fn)
env=DeepRacerActionWrapper(env, max_steering_angle=30,
steering_angle_granularity=5,
max_speed=3,
speed_granularity=3)
MAX_TRAINING_STEPS=5000MAX_EVALUATE_STEPS=1000# Create Dummy Envenv=DummyVecEnv([lambda: env])
model=PPO2(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=MAX_TRAINING_STEPS)
states=env.reset()
forstepinrange(MAX_EVALUATE_STEPS):
action, _states=model.predict(states)
state, reward, done, info=env.step(action)
ifinfo['progress'] >=99.99:
print("Track complete")
env.close()