Initial setup for training

This commit is contained in:
2025-09-19 21:50:46 +00:00
parent 2db9953601
commit e6b8a9f213
5 changed files with 901 additions and 222 deletions

View File

@@ -1,5 +1,8 @@
from data.db_connect import Database
import hashlib
import numpy as np
# Game date in YYYYmmDD
def pull_training_data(database: Database, game_date: str, game_number: int, park_id: str):
# Training data
@@ -278,7 +281,7 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
print(f"Got the wrong number of games {len(curr_team_game)}")
return None
training_result = (curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1)
training_result = int((curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1))
select_previous_games = """
SELECT
@@ -349,10 +352,10 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
select_previous_games_data_1 = [first_of_the_year, game_date, curr_team_game[1][1]]
training_data = [*curr_game[1:]]
training_data = [*training_data, *curr_team_game[0][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_0)
prev_game_data = [0] * 61
prev_game_data = [0] * 85
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
@@ -372,11 +375,12 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
training_data = [*training_data, *curr_team_game[1][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_1)
prev_game_data = [0] * 61
prev_game_data = [0] * 85
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
@@ -396,6 +400,30 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
return (training_result, training_data)
def convert_to_integer(value):
if isinstance(value, int):
return value
elif isinstance(value, str):
return int(
hashlib.blake2b(
data=value.encode(),
digest_size=3,
).hexdigest(),
16
)
elif isinstance(value, float):
# Times 100 should be unique enough
return int(value * 100)
elif isinstance(value, bool):
return int(value)
else:
return -1000
def normalize_training_data(data: list):
new_data = np.array([convert_to_integer(val) for val in data], dtype=np.int64)
return new_data