hello everyone,
im sorry in advance if my question is too broad, im doing my thesis and it is on AI and machine learning and im building a model that learns how to play chess using reinforcement learning DDQN, my current issues with my code are 2 which arre what follows:
-
whenever i write from x import y visual studio tells me it cant find the x file (in yellow underlines) but then when i run the code it does import from x so im confused is it like a VS thing or is it an error and it is not taking in all my imports?
PS: all the code files are in the same directory and i have an init file in the directory aswell. -
i tried refining my step method which is resposible for taking steps but it kept giving -10 reward every iteration until the last modification and now i have an error “Error in step method: ‘Board’ object is not subscriptable” and still giving -10s and idk what to make out of it so a bit of insight would be very helpful.
i will put the code of the chess environment and the training script without the pieces code to save space and time for you.
chess_env:
import torch
import chess
import chess.engine
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from rook import generate_rook_moves
from pawn import generate_pawn_moves
from bishop import generate_bishop_moves
from knight import generate_knight_moves
from queen import generate_queen_moves
from king import generate_king_moves
# Define QNetwork here
class QNetwork(torch.nn.Module):
def __init__(self, input_size, output_size):
super(QNetwork, self).__init__()
self.fc1 = torch.nn.Linear(input_size, 128)
self.relu1 = torch.nn.ReLU()
self.fc2 = torch.nn.Linear(128, 64)
self.relu2 = torch.nn.ReLU()
self.fc3 = torch.nn.Linear(64, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
return x
class ChessEnvironment:
def __init__(self, q_network,actions):
self.q_network = q_network
self.board = chess.Board()
self.turn = 'w'
self.actions = self.generate_all_moves()
def initialize_board(self):
return {
'a1': 'wr', 'b1': 'wn', 'c1': 'wb', 'd1': 'wq', 'e1': 'wk', 'f1': 'wb', 'g1': 'wn', 'h1': 'wr',
'a2': 'wp', 'b2': 'wp', 'c2': 'wp', 'd2': 'wp', 'e2': 'wp', 'f2': 'wp', 'g2': 'wp', 'h2': 'wp',
'a8': 'br', 'b8': 'bn', 'c8': 'bb', 'd8': 'bq', 'e8': 'bk', 'f8': 'bb', 'g8': 'bn', 'h8': 'br',
'a7': 'bp', 'b7': 'bp', 'c7': 'bp', 'd7': 'bp', 'e7': 'bp', 'f7': 'bp', 'g7': 'bp', 'h7': 'bp',
}
def generate_all_moves(self):
# Generate all possible legal moves in UCI format
return [move.uci() for move in self.board.legal_moves]
def reset(self):
self.board.reset()
self.turn = 'w'
return self.get_state()
def get_state(self):
# Convert board state to a 1D tensor of length 64
state = np.zeros(64, dtype=int)
for square, piece in self.board.piece_map().items():
state[square] = piece.piece_type if piece.color == chess.WHITE else -piece.piece_type
return state
#return torch.tensor(board_state, dtype=torch.float32).unsqueeze(0) # Add batch dimension
def legal_moves(self):
legal_moves = []
for square, piece in self.board.items():
if piece[0] == self.turn:
moves = self.generate_valid_moves(square, piece)
legal_moves.extend(self.filter_moves_check(square, moves))
return legal_moves
def generate_valid_moves(self, square, piece):
if piece[1] == 'r':
return generate_rook_moves(self.board, square, piece[0])
elif piece[1] == 'p':
return generate_pawn_moves(self.board, square, piece[0])
elif piece[1] == 'b' :
return generate_bishop_moves(self.board, square, piece[0])
elif piece[1] == 'kn':
return generate_knight_moves(self.board, square, piece[0])
elif piece[1] == 'k':
return generate_king_moves(self.board, square, piece[0])
elif piece[1] == 'q':
return generate_queen_moves(self.board, square, piece[0])
return []
def filter_moves_check(self, square, moves):
valid_moves = []
for move in moves:
self.make_move(square, move)
if not self.is_check(self.turn):
valid_moves.append(move)
self.undo_move(square, move)
return valid_moves
def make_move(self, source_square, destination_square):
self.board[destination_square] = self.board[source_square]
del self.board[source_square]
self.turn = 'b' if self.turn == 'w' else 'w'
def undo_move(self, source_square, destination_square):
self.board[source_square] = self.board[destination_square]
del self.board[destination_square]
self.turn = 'b' if self.turn == 'w' else 'w'
def is_check(self, color):
king_square = None
for square, piece in self.board.items():
if piece == color + 'k':
king_square = square
break
if king_square:
for square, piece in self.board.items():
if piece[0] != color:
if king_square in self.generate_valid_moves(square, piece):
return True
return False
def is_checkmate(self):
return self.is_check(self.turn) and not self.legal_moves()
def is_stalemate(self):
return not self.is_check(self.turn) and not self.legal_moves()
def is_insufficient_material(self):
pieces = [piece[1] for piece in self.board.values()]
return pieces in [['k', 'k'], ['k', 'kn', 'k'], ['k', 'kb', 'k']]
def step(self, action):
move = self.actions[action]
source_square = move[:2]
destination_square = move[2:]
try:
self.make_move(source_square, destination_square)
except Exception as e:
print(f"Error in step method: {e}")
return self.get_state(), -10, True
next_state = self.get_state()
reward = self.calculate_reward(source_square, destination_square)
done = self.is_checkmate() or self.is_stalemate() or self.is_insufficient_material()
if self.is_checkmate():
reward += 100
elif self.is_stalemate() or self.is_insufficient_material():
reward += 0
return next_state, reward, done
"""try:
# Extract source and destination squares from the action index
move_uci = self.actions[action]
move = chess.Move.from_uci(move_uci)
# Check if the move is legal
if move not in self.board.legal_moves:
return self.get_state(), -10, True # Invalid move with high negative reward, and end episode
# Make the move on the board
self.make_move(move)
# Get the new state after the move
next_state = self.get_state()
# Calculate reward based on the board state after the move
reward = self.calculate_reward()
# Check if the game is done (checkmate, stalemate, or insufficient material)
done = self.board.is_checkmate() or self.board.is_stalemate() or self.is_insufficient_material()
if self.board.is_checkmate():
reward += 100 # Reward for achieving checkmate
elif self.board.is_stalemate() or self.is_insufficient_material():
reward += 0 # No reward for stalemate or insufficient material
# Update the actions list
self.actions = self.generate_all_moves()
return next_state, reward, done
except Exception as e:
print(f"Error in step method: {e}")
return self.get_state(), -10, True"""
def calculate_reward(self, source_square, destination_square):
# Reward function for specific events
reward = 0
# Reward for capturing a piece
if destination_square in self.board:
captured_piece = self.board[destination_square]
piece_value = {'p': 1, 'n': 3, 'b': 3, 'r': 5, 'q': 9, 'k': 0}
reward += piece_value[captured_piece[1]]
# Reward for putting the opponent in check
if self.is_check(self.turn):
reward += 5
# Penalize for each move to encourage faster resolution
reward -= 1
return reward
train:
import torch
import torch.nn as nn
import torch.optim as optim
import chess
import chess.engine
import random
import numpy as np
from chess_env import ChessEnvironment, QNetwork
# Define the actions (all possible moves in UCI notation)
actions = [move for move in chess.Board().legal_moves]
# Initialize Q-Network with the correct input and output sizes
input_size = 64 # Since the state is represented by an array of 64 integers
output_size = len(actions) # Number of possible actions
q_network = QNetwork(input_size=input_size, output_size=output_size)
env = ChessEnvironment(q_network, actions)
# Hyperparameters
gamma = 0.99 # Discount factor for future rewards
epsilon = 1.0 # Initial exploration rate
epsilon_min = 0.01 # Minimum exploration rate
epsilon_decay = 0.995 # Decay rate for exploration probability
learning_rate = 0.001 # Learning rate for the optimizer
optimizer = optim.Adam(env.q_network.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
# Training loop
num_episodes = 1000
for episode in range(num_episodes):
state = env.reset()
total_reward = 0
done = False
while not done:
if random.uniform(0, 1) < epsilon:
action = random.randint(0, len(env.actions)-1) # Explore: select a random action
else:
state_tensor = torch.FloatTensor(state).unsqueeze(0)
q_values = env.q_network(state_tensor)
action = torch.argmax(q_values).item() # Exploit: select the action with max Q-value
next_state, reward, done = env.step(action) # Take a step in the environmet
total_reward += reward
# Update Q-values using Bellman equation
next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
q_values_next = env.q_network(next_state_tensor)
target = reward + (gamma * torch.max(q_values_next).item() * (1 - done))
# Convert state to tensor
state_tensor = torch.FloatTensor(state).unsqueeze(0)
q_values = env.q_network(state_tensor)
# Make a copy of the current Q-values to modify the target
target_f = q_values.clone().detach()
# Ensure the action index is within bounds
if action >= len(env.actions):
raise ValueError(f"Action index {action} out of bounds for actions list of length {len(env.actions)}")
target_f[0][action] = target
loss = criterion(q_values, target_f)
optimizer.zero_grad()
loss.backward()
optimizer.step()
state = next_state
epsilon = max(epsilon_min, epsilon * epsilon_decay)
print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}")