mirror of
https://github.com/friuns2/BlackFriday-GPTs-Prompts.git
synced 2026-04-30 14:03:55 +07:00
358 lines
10 KiB
Markdown
358 lines
10 KiB
Markdown
|
|
|
|
# Flow Chart Generator - With Mermaid.live | [Start Chat](https://gptcall.net/chat.html?data=%7B%22contact%22%3A%7B%22id%22%3A%22uzZ_YXqA5Jt8PZTFzD9xR%22%2C%22flow%22%3Atrue%7D%7D)
|
|
Tired of preparing presentations and slides? Automatically package your code or idea into a flowchart, sequence diagram, Gantt chart or other diagrams with this Prompt.
|
|
|
|
|
|
|
|
Based on online editor of Mermaid.live (https://mermaid.live/)
|
|
|
|
|
|
|
|
The generated results from ChatGPT will be input to Mermaid.live flowchart generator, which outputs nice plot and looks like the format below:
|
|
|
|
sequenceDiagram
|
|
|
|
participant env as Environment
|
|
|
|
participant main
|
|
|
|
participant rb as ReplayBuffer
|
|
|
|
participant agent as DDPG Agent
|
|
|
|
|
|
|
|
main->>agent: Initialize DDPG Agent
|
|
|
|
main->>rb: Initialize ReplayBuffer
|
|
|
|
loop Episode 1 to 100
|
|
|
|
main->>env: Reset environment
|
|
|
|
loop Time step 1 to 200
|
|
|
|
main->>agent: Select action
|
|
|
|
main->>env: Perform action
|
|
|
|
env-->>main: next_state, reward, done
|
|
|
|
main->>rb: Add experience to ReplayBuffer
|
|
|
|
|
|
|
|
opt Training
|
|
|
|
main->>agent: Train
|
|
|
|
agent->>rb: Sample from ReplayBuffer
|
|
|
|
rb-->>agent: state, action, reward, next_state, done
|
|
|
|
|
|
|
|
agent->>agent: Compute critic_loss and actor_loss
|
|
|
|
agent->>agent: Update critic and actor weights
|
|
|
|
agent->>agent: Update target networks
|
|
|
|
end
|
|
|
|
|
|
|
|
main->>main: Update state and episode_reward
|
|
|
|
opt Episode ends
|
|
|
|
main->>main: Break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
main->>main: Record episode reward
|
|
|
|
end
|
|
|
|
# Prompt
|
|
|
|
```
|
|
Help me to write the logic in the following program in into mermaid.live input to generate sequenceDiagram:
|
|
{ code:
|
|
import numpy as np
|
|
import tensorflow as tf
|
|
from tensorflow.keras import layers
|
|
import gym
|
|
|
|
# Hyperparameters
|
|
GAMMA = 0.99
|
|
TAU = 0.005
|
|
BATCH_SIZE = 64
|
|
BUFFER_SIZE = 1000000
|
|
ACTOR_LR = 0.001
|
|
CRITIC_LR = 0.002
|
|
|
|
class ReplayBuffer:
|
|
def __init__(self, buffer_size):
|
|
self.buffer_size = buffer_size
|
|
self.buffer = []
|
|
self.position = 0
|
|
|
|
def add(self, state, action, reward, next_state, done):
|
|
transition = (state, action, reward, next_state, done)
|
|
if len(self.buffer) < self.buffer_size:
|
|
self.buffer.append(transition)
|
|
else:
|
|
self.buffer[self.position] = transition
|
|
self.position = (self.position + 1) % self.buffer_size
|
|
|
|
def sample(self, batch_size):
|
|
indices = np.random.choice(len(self.buffer), size=batch_size)
|
|
return [self.buffer[i] for i in indices]
|
|
|
|
def __len__(self):
|
|
return len(self.buffer)
|
|
|
|
class DDPG:
|
|
def __init__(self, state_dim, action_dim, max_action):
|
|
self.actor = self.create_actor(state_dim, action_dim, max_action)
|
|
self.actor_target = self.create_actor(state_dim, action_dim, max_action)
|
|
self.actor_target.set_weights(self.actor.get_weights())
|
|
|
|
self.critic = self.create_critic(state_dim, action_dim)
|
|
self.critic_target = self.create_critic(state_dim, action_dim)
|
|
self.critic_target.set_weights(self.critic.get_weights())
|
|
|
|
self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=ACTOR_LR)
|
|
self.critic_optimizer = tf.keras.optimizers.Adam(learning_rate=CRITIC_LR)
|
|
|
|
def create_actor(self, state_dim, action_dim, max_action):
|
|
inputs = layers.Input(shape=(state_dim,))
|
|
x = layers.Dense(400, activation='relu')(inputs)
|
|
x = layers.Dense(300, activation='relu')(x)
|
|
x = layers.Dense(action_dim, activation='tanh')(x)
|
|
outputs = max_action * x
|
|
return tf.keras.Model(inputs=inputs, outputs=outputs)
|
|
|
|
def create_critic(self, state_dim, action_dim):
|
|
state_inputs = layers.Input(shape=(state_dim,))
|
|
action_inputs = layers.Input(shape=(action_dim,))
|
|
x = layers.Concatenate()([state_inputs, action_inputs])
|
|
x = layers.Dense(400, activation='relu')(x)
|
|
x = layers.Dense(300, activation='relu')(x)
|
|
outputs = layers.Dense(1)(x)
|
|
return tf.keras.Model(inputs=[state_inputs, action_inputs], outputs=outputs)
|
|
|
|
def train(self, replay_buffer):
|
|
sample = replay_buffer.sample(BATCH_SIZE)
|
|
state, action, reward, next_state, done = list(map(np.array, zip(*sample)))
|
|
|
|
with tf.GradientTape() as tape:
|
|
target_actions = self.actor_target(next_state)
|
|
target_q_values = self.critic_target([next_state, target_actions])
|
|
target_values = reward + GAMMA * target_q_values * (1 - done)
|
|
q_values = self.critic([state, action])
|
|
critic_loss = tf.reduce_mean((q_values - target_values) ** 2)
|
|
critic_grads = tape.gradient(critic_loss, self.critic.trainable_variables)
|
|
self.critic_optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables))
|
|
|
|
with tf.GradientTape() as tape:
|
|
actions = self.actor(state)
|
|
actor_loss = -tf.reduce_mean(self.critic([state, actions]))
|
|
actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables)
|
|
self.actor_optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))
|
|
|
|
# Update target networks
|
|
self.update_target_networks()
|
|
|
|
def update_target_networks(self):
|
|
actor_weights = self.actor.get_weights()
|
|
actor_target_weights = self.actor_target.get_weights()
|
|
critic_weights = self.critic.get_weights()
|
|
critic_target_weights = self.critic_target.get_weights()
|
|
|
|
for i in range(len(actor_weights)):
|
|
actor_target_weights[i] = TAU * actor_weights[i] + (1 - TAU) * actor_target_weights[i]
|
|
|
|
for i in range(len(critic_weights)):
|
|
critic_target_weights[i] = TAU * critic_weights[i] + (1 - TAU) * critic_target_weights[i]
|
|
|
|
self.actor_target.set_weights(actor_target_weights)
|
|
self.critic_target.set_weights(critic_target_weights)
|
|
|
|
def select_action(self, state):
|
|
state = np.expand_dims(state, axis=0)
|
|
return self.actor(state).numpy().flatten()
|
|
agent = DDPG(state_dim, action_dim, max_action)
|
|
replay_buffer = ReplayBuffer(BUFFER_SIZE)
|
|
|
|
episode_rewards = []
|
|
|
|
for episode in range(1, 101):
|
|
state = env.reset()
|
|
episode_reward = 0
|
|
|
|
for t in range(1, 201):
|
|
action = agent.select_action(state)
|
|
next_state, reward, done, _ = env.step(action)
|
|
|
|
replay_buffer.add(state, action, reward, next_state, done)
|
|
|
|
if len(replay_buffer) >= BATCH_SIZE:
|
|
agent.train(replay_buffer)
|
|
|
|
state = next_state
|
|
episode_reward += reward
|
|
|
|
if done:
|
|
break
|
|
}
|
|
```
|
|
|
|
## Welcome Message
|
|
Here is the mermaid code for the sequence diagram of the given program:
|
|
|
|
|
|
|
|
```
|
|
|
|
sequenceDiagram
|
|
|
|
participant main_program
|
|
|
|
participant ReplayBuffer
|
|
|
|
participant DDPG
|
|
|
|
participant gym
|
|
|
|
participant tf.GradientTape
|
|
|
|
participant actor_optimizer
|
|
|
|
participant critic_optimizer
|
|
|
|
|
|
|
|
main_program->>+ReplayBuffer: Add to buffer
|
|
|
|
main_program->>+DDPG: Select action
|
|
|
|
DDPG->>+gym: Get next state, reward
|
|
|
|
DDPG->>-ReplayBuffer: Sample from buffer
|
|
|
|
DDPG->>tf.GradientTape: Compute critic loss
|
|
|
|
tf.GradientTape->>-DDPG: Update critic weights
|
|
|
|
DDPG->>tf.GradientTape: Compute actor loss
|
|
|
|
tf.GradientTape->>-DDPG: Update actor weights
|
|
|
|
DDPG->>actor_optimizer: Apply actor gradients
|
|
|
|
actor_optimizer->>-DDPG: Update actor weights
|
|
|
|
DDPG->>critic_optimizer: Apply critic gradients
|
|
|
|
critic_optimizer->>-DDPG: Update critic weights
|
|
|
|
DDPG->>+DDPG: Update target networks
|
|
|
|
DDPG-->>-main_program: Episode reward
|
|
|
|
```
|
|
|
|
|
|
|
|
The above diagram represents the flow of control between different objects/classes in the program. It starts with the main program sending the state, action, reward, and next_state to the ReplayBuffer object. Then, the DDPG object selects an action based on the current state and sends it to the gym environment to get the next state and reward.
|
|
|
|
|
|
|
|
Next, the DDPG object samples a batch of transitions from the ReplayBuffer object and uses them to compute the critic loss and actor loss using GradientTape. The GradientTape object computes the gradients of the loss functions with respect to the trainable variables of the critic and actor models.
|
|
|
|
|
|
|
|
After that, the actor_optimizer and critic_optimizer objects apply the gradients to update the actor and critic models' weights. Finally, the DDPG object updates the target networks using a soft update strategy.
|
|
|
|
|
|
|
|
In the end, the episode reward is sent back to the main program.
|
|
|
|
## Conversation
|
|
|
|
**ASSISTANT**: Here is the mermaid code for the sequence diagram of the given program:
|
|
|
|
|
|
|
|
```
|
|
|
|
sequenceDiagram
|
|
|
|
participant main_program
|
|
|
|
participant ReplayBuffer
|
|
|
|
participant DDPG
|
|
|
|
participant gym
|
|
|
|
participant tf.GradientTape
|
|
|
|
participant actor_optimizer
|
|
|
|
participant critic_optimizer
|
|
|
|
|
|
|
|
main_program->>+ReplayBuffer: Add to buffer
|
|
|
|
main_program->>+DDPG: Select action
|
|
|
|
DDPG->>+gym: Get next state, reward
|
|
|
|
DDPG->>-ReplayBuffer: Sample from buffer
|
|
|
|
DDPG->>tf.GradientTape: Compute critic loss
|
|
|
|
tf.GradientTape->>-DDPG: Update critic weights
|
|
|
|
DDPG->>tf.GradientTape: Compute actor loss
|
|
|
|
tf.GradientTape->>-DDPG: Update actor weights
|
|
|
|
DDPG->>actor_optimizer: Apply actor gradients
|
|
|
|
actor_optimizer->>-DDPG: Update actor weights
|
|
|
|
DDPG->>critic_optimizer: Apply critic gradients
|
|
|
|
critic_optimizer->>-DDPG: Update critic weights
|
|
|
|
DDPG->>+DDPG: Update target networks
|
|
|
|
DDPG-->>-main_program: Episode reward
|
|
|
|
```
|
|
|
|
|
|
|
|
The above diagram represents the flow of control between different objects/classes in the program. It starts with the main program sending the state, action, reward, and next_state to the ReplayBuffer object. Then, the DDPG object selects an action based on the current state and sends it to the gym environment to get the next state and reward.
|
|
|
|
|
|
|
|
Next, the DDPG object samples a batch of transitions from the ReplayBuffer object and uses them to compute the critic loss and actor loss using GradientTape. The GradientTape object computes the gradients of the loss functions with respect to the trainable variables of the critic and actor models.
|
|
|
|
|
|
|
|
After that, the actor_optimizer and critic_optimizer objects apply the gradients to update the actor and critic models' weights. Finally, the DDPG object updates the target networks using a soft update strategy.
|
|
|
|
|
|
|
|
In the end, the episode reward is sent back to the main program.
|
|
|