umm so there is this game calles arras and i wanted to try make a cnn for it. i didnt know where to start so i first wrote code that took screenshots of 300kb (240p 15fps grayscal) and mashed them up into groups of 3 such that the moving things got coloured. that way i got like 300 images every minute or so like 1.4-1.5mb per second. so this is the python code for it
import cv2
import numpy as np
import time
from mss import mss
import os
import csv
from pynput import keyboard, mouse
# ===== Settings =====
SAVE_DIR = "recorded_frames"
FPS = 15
STACK_SIZE = 3
OUTPUT_RES = (426, 240) # width, height (240p)
MONITOR = {"top": 0, "left": 0, "width": 2560, "height": 1440} # adjust if needed
INPUT_LOG = os.path.join(SAVE_DIR, "recorded_inputs.csv")
PREVIEW_DIR = os.path.join(SAVE_DIR, "preview_frames")
os.makedirs(SAVE_DIR, exist_ok=True)
os.makedirs(PREVIEW_DIR, exist_ok=True)
# ===== Input State =====
keys_pressed = set()
mouse_pos = (0, 0)
mouse_buttons = set()
recording = False # starts paused
stop_program = False
def on_press(key):
global recording, stop_program
try:
if key == keyboard.Key.f8: # start recording
print(" Recording started!")
recording = True
elif key == keyboard.Key.f9: # stop recording
print("⏹ Recording stopped!")
stop_program = True
return False # stop keyboard listener
else:
keys_pressed.add(str(key))
except AttributeError:
keys_pressed.add(str(key))
def on_release(key):
try:
keys_pressed.discard(key.char)
except AttributeError:
keys_pressed.discard(str(key))
def on_click(x, y, button, pressed):
if pressed:
mouse_buttons.add(str(button))
else:
mouse_buttons.discard(str(button))
def on_move(x, y):
global mouse_pos
mouse_pos = (x, y)
# ===== Recorder =====
def record():
global recording, stop_program
sct = mss()
frame_count = 0
frame_stack = []
last_time = time.time()
# Prepare CSV file
with open(INPUT_LOG, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["frame_id", "keys_pressed", "mouse_x", "mouse_y", "mouse_buttons"])
# Start input listeners
kb_listener = keyboard.Listener(on_press=on_press, on_release=on_release)
ms_listener = mouse.Listener(on_click=on_click, on_move=on_move)
kb_listener.start()
ms_listener.start()
print("Press F8 to start recording, F9 to stop.")
while not stop_program:
if not recording:
time.sleep(0.05)
continue
if time.time() - last_time < 1 / FPS:
continue
last_time = time.time()
# Screenshot
img = np.array(sct.grab(MONITOR))
gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
resized = cv2.resize(gray, OUTPUT_RES, interpolation=cv2.INTER_AREA)
frame_stack.append(resized)
if len(frame_stack) == STACK_SIZE:
frame_count += 1
stacked = np.stack(frame_stack, axis=-1) # shape: (240, 426, 3)
# Save frames
frame_file = os.path.join(SAVE_DIR, f"frame_{frame_count:05d}.npy")
np.save(frame_file, stacked)
# Save input log
with open(INPUT_LOG, "a", newline="") as f:
writer = csv.writer(f)
writer.writerow([
frame_count,
list(keys_pressed),
mouse_pos[0],
mouse_pos[1],
list(mouse_buttons)
])
# Save preview every 20th stack
if frame_count % 20 == 0:
preview_file = os.path.join(PREVIEW_DIR, f"preview_{frame_count:05d}.png")
cv2.imwrite(preview_file, frame_stack[-1])
print(f"Preview saved: {preview_file}")
print(f"Saved {frame_file} with inputs")
frame_stack = []
# Show preview in a window
cv2.imshow("Recording (240p grayscale)", resized)
if cv2.waitKey(1) & 0xFF == ord('q'):
stop_program = True
break
cv2.destroyAllWindows()
kb_listener.stop()
ms_listener.stop()
if __name__ == "__main__":
record()
so that takes the scrrenshot and to visualize it u can use the code
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image # for saving as PNG/JPG
# --- Step 1: Load the frame ---
frame = np.load("frame_00001.npy") # replace with your filename or frame number
print("Shape:", frame.shape)
print("Dtype:", frame.dtype)
# --- Step 2: Display the image ---
plt.imshow(frame)
plt.axis("off")
plt.show()
# --- Step 3: If you want grayscale only ---
# If the frame has 3 channels but is grayscale-like, pick one channel
if frame.ndim == 3 and frame.shape[2] == 3:
gray_frame = frame[:, :, 0] # take the first channel
else:
gray_frame = frame # already grayscale
plt.imshow(gray_frame, cmap="gray")
plt.axis("off")
plt.show()
# --- Step 4: Save the frame as an image file ---
# Save RGB version
Image.fromarray(frame).save("frame_rgb.png")
# Save grayscale version
Image.fromarray(gray_frame).save("frame_gray.png")
so if anyone knows how to train cnn on images pls teach me once i will be very grateful