Spaces:
Runtime error
Runtime error
Initial commit
Browse files- app.py +122 -0
- nim_game_env.py +91 -0
- nim_gpt_functions.py +72 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from nim_game_env import NimGameEnv
|
| 3 |
+
from nim_gpt_functions import plan_move, execute_move
|
| 4 |
+
|
| 5 |
+
TEMPERATURE_DEFAULT = 0.5
|
| 6 |
+
PILES_DEFAULT = [3, 5, 7]
|
| 7 |
+
HUMAN_STR = "Human"
|
| 8 |
+
AI_STR = "AI"
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def reset_game(chat_history, nim_game_env):
|
| 12 |
+
chat_history = []
|
| 13 |
+
nim_game_env = NimGameEnv(PILES_DEFAULT)
|
| 14 |
+
game_state_text, game_state_piles = nim_game_env.reset()
|
| 15 |
+
ascii_art = generate_game_state_ascii_art(game_state_piles, False, 0, "")
|
| 16 |
+
message_str = ""
|
| 17 |
+
return chat_history, chat_history, message_str, ascii_art, nim_game_env
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def generate_game_state_ascii_art(piles, done, reward, player):
|
| 21 |
+
ascii_art = "Game Over, " + player + " wins!"
|
| 22 |
+
if not done:
|
| 23 |
+
pile_a = piles[0]
|
| 24 |
+
pile_b = piles[1]
|
| 25 |
+
pile_c = piles[2]
|
| 26 |
+
ascii_art = f"Pile A: {'|' * pile_a} \nPile B: {'|' * pile_b} \nPile C: {'|' * pile_c}"
|
| 27 |
+
return "<pre>" + ascii_art + "</pre>"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def send_chat_msg(inp, chat_history, nim_game_env, temperature, openai_api_key):
|
| 31 |
+
if not openai_api_key or openai_api_key == "":
|
| 32 |
+
warning_msg = "<pre>Please paste your OpenAI API key (see https://beta.openai.com)</pre>"
|
| 33 |
+
return chat_history, chat_history, warning_msg
|
| 34 |
+
|
| 35 |
+
if not inp or inp == "":
|
| 36 |
+
warning_msg = "<pre>Please enter a move</pre>"
|
| 37 |
+
return chat_history, chat_history, warning_msg
|
| 38 |
+
|
| 39 |
+
inp = inp.strip()
|
| 40 |
+
output = None
|
| 41 |
+
chat_history = chat_history or []
|
| 42 |
+
|
| 43 |
+
text_obs, observation, reward, done, info = execute_move(inp, nim_game_env, openai_api_key)
|
| 44 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
| 45 |
+
|
| 46 |
+
if done:
|
| 47 |
+
if reward == 1:
|
| 48 |
+
output = "Good game!"
|
| 49 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, HUMAN_STR)
|
| 50 |
+
else:
|
| 51 |
+
output = text_obs
|
| 52 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
|
| 53 |
+
else:
|
| 54 |
+
output = plan_move(text_obs, temperature, openai_api_key)
|
| 55 |
+
text_obs, observation, reward, done, info = execute_move(output, nim_game_env, openai_api_key)
|
| 56 |
+
ascii_art = generate_game_state_ascii_art(observation, done, reward, AI_STR)
|
| 57 |
+
|
| 58 |
+
chat_history.append((HUMAN_STR + ": " + inp, AI_STR + ": " + output))
|
| 59 |
+
return chat_history, chat_history, ascii_art
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def update_foo(widget, state):
|
| 63 |
+
if widget:
|
| 64 |
+
state = widget
|
| 65 |
+
return state
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
| 69 |
+
with block as nim_game:
|
| 70 |
+
temperature_state = gr.State(TEMPERATURE_DEFAULT)
|
| 71 |
+
openai_api_key_state = gr.State()
|
| 72 |
+
history_state = gr.State()
|
| 73 |
+
nim_game_env_state = gr.State(NimGameEnv(PILES_DEFAULT))
|
| 74 |
+
|
| 75 |
+
with gr.Row():
|
| 76 |
+
game_state_html = gr.Markdown()
|
| 77 |
+
title = gr.Markdown("""<h3><center>NimGPT-3.5</center></h3>""")
|
| 78 |
+
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key",
|
| 79 |
+
show_label=False, lines=1, type='password')
|
| 80 |
+
|
| 81 |
+
chatbot = gr.Chatbot()
|
| 82 |
+
|
| 83 |
+
with gr.Row():
|
| 84 |
+
message_tb = gr.Textbox(label="What's your move?",
|
| 85 |
+
placeholder="I'll take 2 sticks from pile A")
|
| 86 |
+
send_btn = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 87 |
+
|
| 88 |
+
with gr.Row():
|
| 89 |
+
gr.Examples(
|
| 90 |
+
examples=["Three sticks from the second pile",
|
| 91 |
+
"From pile C remove 2 sticks"],
|
| 92 |
+
inputs=message_tb
|
| 93 |
+
)
|
| 94 |
+
reset_btn = gr.Button(value="Reset Game", variant="secondary").style(full_width=False)
|
| 95 |
+
temperature_slider = gr.Slider(label="GPT Temperature", value=TEMPERATURE_DEFAULT, minimum=0.0, maximum=1.0,
|
| 96 |
+
step=0.1)
|
| 97 |
+
|
| 98 |
+
send_btn.click(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
| 99 |
+
openai_api_key_state],
|
| 100 |
+
outputs=[chatbot, history_state, game_state_html])
|
| 101 |
+
message_tb.submit(send_chat_msg, inputs=[message_tb, history_state, nim_game_env_state, temperature_state,
|
| 102 |
+
openai_api_key_state],
|
| 103 |
+
outputs=[chatbot, history_state, game_state_html])
|
| 104 |
+
reset_btn.click(reset_game, inputs=[history_state, nim_game_env_state],
|
| 105 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
| 106 |
+
nim_game.load(reset_game, inputs=[history_state, nim_game_env_state],
|
| 107 |
+
outputs=[chatbot, history_state, message_tb, game_state_html, nim_game_env_state])
|
| 108 |
+
|
| 109 |
+
gr.Markdown("""<center><a href="https://en.wikipedia.org/wiki/Nim" target="new">
|
| 110 |
+
Nim is one of the first-ever electronic computerized games</a></center>""")
|
| 111 |
+
|
| 112 |
+
gr.HTML("<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>")
|
| 113 |
+
|
| 114 |
+
openai_api_key_textbox.change(update_foo,
|
| 115 |
+
inputs=[openai_api_key_textbox, openai_api_key_state],
|
| 116 |
+
outputs=[openai_api_key_state])
|
| 117 |
+
|
| 118 |
+
temperature_slider.change(update_foo,
|
| 119 |
+
inputs=[temperature_slider, temperature_state],
|
| 120 |
+
outputs=[temperature_state])
|
| 121 |
+
|
| 122 |
+
block.launch(debug=False)
|
nim_game_env.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
from gymnasium import spaces
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class NimGameEnv(gym.Env, ABC):
|
| 9 |
+
"""Custom environment for a simple Nim game.
|
| 10 |
+
|
| 11 |
+
In this game, there are two players and a number of piles of stones.
|
| 12 |
+
Each turn, a player can choose a pile and remove any number of stones from it.
|
| 13 |
+
The player who takes the last stone loses.
|
| 14 |
+
|
| 15 |
+
The observation space is a tuple of integers representing the number of stones in each pile.
|
| 16 |
+
The action space is a tuple of two integers, representing the chosen pile and the number of stones to remove.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, starting_stick_piles=[3, 5, 7]):
|
| 20 |
+
self.starting_stick_piles = starting_stick_piles
|
| 21 |
+
self.num_piles = len(starting_stick_piles)
|
| 22 |
+
self.max_stones = max(starting_stick_piles)
|
| 23 |
+
self.piles = self._init_piles()
|
| 24 |
+
self.current_player = 0
|
| 25 |
+
self.action_space = spaces.MultiDiscrete([self.num_piles, self.max_stones + 1])
|
| 26 |
+
self.observation_space = spaces.MultiDiscrete([self.max_stones + 1] * self.num_piles)
|
| 27 |
+
|
| 28 |
+
def step(self, action):
|
| 29 |
+
"""Take a step in the environment.
|
| 30 |
+
|
| 31 |
+
Parameters
|
| 32 |
+
----------
|
| 33 |
+
action: tuple
|
| 34 |
+
The action taken by the player, represented as a tuple of the chosen pile and the number of stones to remove.
|
| 35 |
+
|
| 36 |
+
Returns
|
| 37 |
+
-------
|
| 38 |
+
observation: tuple
|
| 39 |
+
The current number of stones in each pile.
|
| 40 |
+
reward: float
|
| 41 |
+
The reward for the current step.
|
| 42 |
+
done: bool
|
| 43 |
+
Whether the game has ended.
|
| 44 |
+
info: dict
|
| 45 |
+
Additional information about the step.
|
| 46 |
+
"""
|
| 47 |
+
# Validate the action
|
| 48 |
+
if not self._is_valid_action(action):
|
| 49 |
+
raise ValueError("Invalid action")
|
| 50 |
+
|
| 51 |
+
# Update the piles
|
| 52 |
+
pile, num_stones = action
|
| 53 |
+
self.piles[pile] -= num_stones
|
| 54 |
+
|
| 55 |
+
# Determine if the game has ended
|
| 56 |
+
done = self._is_game_over()
|
| 57 |
+
|
| 58 |
+
# Calculate the reward
|
| 59 |
+
reward = self._calculate_reward()
|
| 60 |
+
|
| 61 |
+
# Switch the current player
|
| 62 |
+
self.current_player = (self.current_player + 1) % 2
|
| 63 |
+
return self.piles, reward, done, {}
|
| 64 |
+
|
| 65 |
+
def reset(self):
|
| 66 |
+
"""Reset the environment to the initial state."""
|
| 67 |
+
self.piles = self._init_piles()
|
| 68 |
+
self.current_player = 0
|
| 69 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in self.piles) + " sticks."
|
| 70 |
+
return text_observation, self.piles
|
| 71 |
+
|
| 72 |
+
def _init_piles(self):
|
| 73 |
+
"""Initialize the stick piles."""
|
| 74 |
+
return [3, 5, 7]
|
| 75 |
+
|
| 76 |
+
def _generate_random_stones(self):
|
| 77 |
+
"""Generate a random number of stones (between 1 and max_stones inclusive)."""
|
| 78 |
+
return np.random.randint(1, self.max_stones + 1)
|
| 79 |
+
|
| 80 |
+
def _is_valid_action(self, action):
|
| 81 |
+
"""Determine if an action is valid."""
|
| 82 |
+
pile, num_stones = action
|
| 83 |
+
return 0 <= pile < self.num_piles and 0 < num_stones <= self.max_stones and num_stones <= self.piles[pile]
|
| 84 |
+
|
| 85 |
+
def _is_game_over(self):
|
| 86 |
+
"""Determine if the game has ended."""
|
| 87 |
+
return all(pile == 0 for pile in self.piles)
|
| 88 |
+
|
| 89 |
+
def _calculate_reward(self):
|
| 90 |
+
"""Calculate the reward for the current step."""
|
| 91 |
+
return 1 if self._is_game_over() else 0
|
nim_gpt_functions.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain import OpenAI
|
| 2 |
+
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
|
| 3 |
+
from langchain.chains import LLMChain
|
| 4 |
+
|
| 5 |
+
EXAMPLES_PROMPT_TEMPLATE = PromptTemplate(
|
| 6 |
+
input_variables=["input", "output"],
|
| 7 |
+
template="Input: {input}\nOutput: {output}"
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
PLAN_MOVE_PROMPT_EXAMPLES = [
|
| 11 |
+
{"input": "The piles contain 3, 5, 7 sticks", "output": "I'll take one stick from pile A"},
|
| 12 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take one stick from pile B"},
|
| 13 |
+
{"input": "The piles contain 2, 5, 7 sticks", "output": "I'll take five stick from pile B"},
|
| 14 |
+
{"input": "The piles contain 1, 2, 3 sticks", "output": "I'll take two sticks from pile C"},
|
| 15 |
+
{"input": "The piles contain 0, 2, 3 sticks", "output": "I'll take one stick from pile C"},
|
| 16 |
+
{"input": "The piles contain 0, 2, 0 sticks", "output": "I'll take two sticks from pile B"},
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
| 20 |
+
examples=PLAN_MOVE_PROMPT_EXAMPLES,
|
| 21 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
| 22 |
+
prefix="Nim is a two-player game of strategy in which players take turns removing objects from separate piles. "
|
| 23 |
+
"The goal of the game is to remove the last sticks from a pile when the other piles contain 0 sticks. Each "
|
| 24 |
+
"of these inputs represent a game state. For each of these game states please express a logical move that "
|
| 25 |
+
"consists of taking some number of sticks from a pile. You may not take any sticks from a pile that "
|
| 26 |
+
"contains 0 sticks.",
|
| 27 |
+
suffix="Input: {text_game_state}\nOutput:",
|
| 28 |
+
input_variables=["text_game_state"],
|
| 29 |
+
example_separator="\n\n"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
EXEC_MOVE_PROMPT_EXAMPLES = [
|
| 33 |
+
{"input": "I'll take two sticks from pile A", "output": "0,2"},
|
| 34 |
+
{"input": "I'll take 3 sticks from the first pile", "output": "0,3"},
|
| 35 |
+
{"input": "I'll take two sticks from pile C", "output": "2,2"},
|
| 36 |
+
{"input": "I'll take one stick from the third pile", "output": "2,1"},
|
| 37 |
+
{"input": "From pile B remove 2 sticks", "output": "1,2"},
|
| 38 |
+
{"input": "I'll take the last stick from pile C", "output": "2,1"},
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES = FewShotPromptTemplate(
|
| 42 |
+
examples=EXEC_MOVE_PROMPT_EXAMPLES,
|
| 43 |
+
example_prompt=EXAMPLES_PROMPT_TEMPLATE,
|
| 44 |
+
prefix="Express every input as two numbers separated by a comma, where the first number is the zero index pile "
|
| 45 |
+
"number and the second number is the number of sticks to remove.",
|
| 46 |
+
suffix="Input: {move_to_express}\nOutput:",
|
| 47 |
+
input_variables=["move_to_express"],
|
| 48 |
+
example_separator="\n\n"
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def plan_move(text_game_state, temperature, api_key):
|
| 53 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=temperature, max_tokens=100,
|
| 54 |
+
openai_api_key=api_key)
|
| 55 |
+
llm_chain = LLMChain(llm=llm, prompt=PLAN_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
| 56 |
+
planned_move = llm_chain.run({'text_game_state': text_game_state})
|
| 57 |
+
return planned_move
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def execute_move(move_to_express, nim_game_env, api_key):
|
| 61 |
+
llm = OpenAI(model_name='text-davinci-003', temperature=0.0, max_tokens=10,
|
| 62 |
+
openai_api_key=api_key)
|
| 63 |
+
llm_chain = LLMChain(llm=llm, prompt=EXEC_MOVE_PROMPT_FROM_STRING_EXAMPLES, verbose=False)
|
| 64 |
+
step_tuple_str = llm_chain.run({'move_to_express': move_to_express})
|
| 65 |
+
step_tuple = tuple(int(x) for x in step_tuple_str.split(','))
|
| 66 |
+
try:
|
| 67 |
+
step_result = nim_game_env.step(step_tuple)
|
| 68 |
+
except ValueError:
|
| 69 |
+
return "Invalid move!", [0, 0, 0], 0, True, None
|
| 70 |
+
|
| 71 |
+
text_observation = "The piles contain " + ", ".join(str(x) for x in step_result[0]) + " sticks."
|
| 72 |
+
return text_observation, step_result[0], step_result[1], step_result[2], step_result[3]
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
gradio
|
| 3 |
+
numpy
|
| 4 |
+
langchain
|
| 5 |
+
gymnasium
|