Initial setup for training

This commit is contained in:
2025-09-19 21:50:46 +00:00
parent 2db9953601
commit e6b8a9f213
5 changed files with 901 additions and 222 deletions

View File

@@ -8,8 +8,10 @@ ephem = "*"
matplotlib = "*"
numpy = "*"
requests = "*"
rich = "*"
suntime = "*"
timezonefinder = "*"
tensorflow = "*"
[dev-packages]

983
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,8 @@
from data.db_connect import Database
import hashlib
import numpy as np
# Game date in YYYYmmDD
def pull_training_data(database: Database, game_date: str, game_number: int, park_id: str):
# Training data
@@ -278,7 +281,7 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
print(f"Got the wrong number of games {len(curr_team_game)}")
return None
training_result = (curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1)
training_result = int((curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1))
select_previous_games = """
SELECT
@@ -349,10 +352,10 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
select_previous_games_data_1 = [first_of_the_year, game_date, curr_team_game[1][1]]
training_data = [*curr_game[1:]]
training_data = [*training_data, *curr_team_game[0][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_0)
prev_game_data = [0] * 61
prev_game_data = [0] * 85
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
@@ -372,11 +375,12 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
training_data = [*training_data, *curr_team_game[1][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_1)
prev_game_data = [0] * 61
prev_game_data = [0] * 85
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
@@ -396,6 +400,30 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
return (training_result, training_data)
def convert_to_integer(value):
if isinstance(value, int):
return value
elif isinstance(value, str):
return int(
hashlib.blake2b(
data=value.encode(),
digest_size=3,
).hexdigest(),
16
)
elif isinstance(value, float):
# Times 100 should be unique enough
return int(value * 100)
elif isinstance(value, bool):
return int(value)
else:
return -1000
def normalize_training_data(data: list):
new_data = np.array([convert_to_integer(val) for val in data], dtype=np.int64)
return new_data

56
main.py
View File

@@ -1,11 +1,16 @@
import numpy as np # helps with the math
import matplotlib.pyplot as plt # to plot error during training
import tensorflow as tf
from data.get_data import pull_training_data
from rich.progress import Progress
from data.get_data import pull_training_data, normalize_training_data
from data.db_connect import Database
from data.build_weather import get_weather, get_sun_and_moon_phase
from data.stats_importer import Importer
from neuralnet.neuralnetwork import NeuralNetwork
# input data
inputs = np.array([[0, 0, 1, 0],
[0, 0, 1, 1],
@@ -23,13 +28,54 @@ if __name__ == '__main__':
query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
all_games = db_conn.selectall(query)
for game in all_games:
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
compiled_training_data = None
compiled_training_results = None
with Progress() as p_bar:
p_bar_val = p_bar.add_task("Getting Data ...", total=len(all_games))
for game in all_games:
p_bar.update(p_bar_val, advance=1)
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
training_data = normalize_training_data(training_data)
if compiled_training_results is None:
compiled_training_data = np.array([training_data])
compiled_training_results = np.array([game_result])
else:
compiled_training_data = np.append(compiled_training_data, np.array([training_data]), axis=0)
compiled_training_results = np.append(compiled_training_results, np.array([game_result]), axis=0)
ctd_len = len(compiled_training_data)
ctr_len = len(compiled_training_results)
td = compiled_training_data[:ctd_len-100]
tdt = compiled_training_data[ctd_len-100:]
tr = compiled_training_results[:ctr_len-100]
trt = compiled_training_results[ctr_len-100:]
nn = NeuralNetwork(len(compiled_training_data[0]))
nn.train(td, tr)
nn.summary()
predictions = nn.predict(np.array(tdt))
total_num = 0
accurate_num = 0
for pred, act in zip(predictions, trt):
total_num += 1
pred = round(pred[0] * 1000) / 1000.0
guess = round(pred)
print(f"Pred: {pred} -> Gue: {guess} -> Res: {act} -> Cor: {guess == act}")
accurate_num += int(guess==act)
print(f"Total: {total_num} -> Accu: {accurate_num} -> Perc: {accurate_num/total_num}")
#for index in range(len(trt)):
# print(f"pred: {nn.predict(np.array([tdt[index]]))} : act : {trt[index]}")
"""
build_db_path = "./data/sql/build_db.sql"
build_db_path = "./data/sql/build_db .sql"
fill_parks_path = "./data/sql/prefill_parks.sql"
fill_teams_path = "./data/sql/prefill_teams.sql"

View File

@@ -1,26 +1,40 @@
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
# create NeuralNetwork class
class NeuralNetwork:
def __init__(self, inputs_len: int):
def __init__(self, input_length: int):
self.model = tf.keras.Sequential([
tf.keras.Input(shape=(input_length,), dtype=tf.int64),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Setup checkpoint
self.checkpoint_path = "./training/cp.ckpt"
self.cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)
self.checkpoint_path = "./training/cp.ckpt.weights.h5"
self.cp_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=self.checkpoint_path,
save_weights_only=True,
save_best_only=True,
monitor='loss',
mode='min',
)
# Setup model
self.model = Sequential()
self.model.add(Dense(12, input_shape=(inputs_len,), activation='relu'))
self.model.add(Dense(8, activation='relu'))
self.model.add(Dense(1, activation='sigmoid'))
self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#if os.path.isfile(self.checkpoint_path):
# self.model.load_weights(self.checkpoint_path)
if os.path.isfile(self.checkpoint_path):
self.model.load_weights(self.checkpoint_path)
def train(self, inputs :list, outputs :list):
self.model.fit(inputs, outputs, epochs=100, batch_size=64, callbacks=[self.cp_callback])
def train(inputs :list, outputs :list):
self.model.fit(inputs, outputs, epochs=150, batch_size=10, callbacks=[self.cp_callback])
def summary(self):
print(self.model.summary())
def predict(self, new_input):
return self.model.predict(new_input)