Implementing REINFORCE Policy Gradient for CartPole-v1
############################### lAB 9 ####################3
import gymnasium as gym
import numpy as np
import tensorflow as tf
# Create the environment
env = gym.Make("CartPole-v1")
# Define a simple neural network model for the policy
model = tf.Keras.Sequential([
tf.Keras.Layers.Dense(16, activation='relu', input_shape=(env.Observation_space.Shape[0],)),
tf.Keras.Layers.Dense(env.Action_space.N, activation='softmax')
])
# Define the optimizer
optimizer = tf.Keras.Optimizers.Adam(learning_rate=0.01)
# Function to choose an action based on the current policy
def choose_action(state):
"""
Chooses an action based on the probabilities output by the policy model.
Args:
state (np.Array): The current observation/state from... Continue reading "Implementing REINFORCE Policy Gradient for CartPole-v1" »
English with a size of 3.14 KB