Initial setup for training

Adding the initial neuralnet files
2025-09-19 21:50:46 +00:00 · 2025-09-18 19:18:43 +00:00
6 changed files with 1073 additions and 206 deletions
--- a/2
+++ b/2
@@ -8,8 +8,10 @@ ephem = "*"
 matplotlib = "*"
 numpy = "*"
 requests = "*"
+rich = "*"
 suntime = "*"
 timezonefinder = "*"
+tensorflow = "*"

 [dev-packages]

--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/data/get_data.py
+++ b/data/get_data.py
@@ -1,5 +1,8 @@
 from data.db_connect import Database

+import hashlib
+import numpy as np
+
 # Game date in YYYYmmDD
 def pull_training_data(database: Database, game_date: str, game_number: int, park_id: str):
    # Training data
@@ -278,7 +281,7 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
        print(f"Got the wrong number of games {len(curr_team_game)}")
        return None

-    training_result = (curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1)
+    training_result = int((curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1))

    select_previous_games = """
        SELECT
@@ -349,10 +352,10 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
    select_previous_games_data_1 = [first_of_the_year, game_date, curr_team_game[1][1]]

    training_data = [*curr_game[1:]]
-
    training_data = [*training_data, *curr_team_game[0][1:]]
+
    prev_games = database.selectall(select_previous_games, select_previous_games_data_0)
-    prev_game_data = [0] * 61
+    prev_game_data = [0] * 85
    if prev_games is not None:
        prev_win_streak = 0
        index = len(prev_games) - 1
@@ -372,11 +375,12 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
            prev_loss_streak,
            *prev_games[index][1:]
        ]
+
    training_data = [*training_data, *prev_game_data]
-
    training_data = [*training_data, *curr_team_game[1][1:]]
+
    prev_games = database.selectall(select_previous_games, select_previous_games_data_1)
-    prev_game_data = [0] * 61
+    prev_game_data = [0] * 85
    if prev_games is not None:
        prev_win_streak = 0
        index = len(prev_games) - 1
@@ -396,6 +400,30 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
            prev_loss_streak,
            *prev_games[index][1:]
        ]
+
    training_data = [*training_data, *prev_game_data]

    return (training_result, training_data)
+
+def convert_to_integer(value):
+    if isinstance(value, int):
+        return value
+    elif isinstance(value, str):
+        return int(
+            hashlib.blake2b(
+                data=value.encode(),
+                digest_size=3,
+            ).hexdigest(),
+            16
+        )
+    elif isinstance(value, float):
+        # Times 100 should be unique enough
+        return int(value * 100)
+    elif isinstance(value, bool):
+        return int(value)
+    else:
+        return -1000
+
+def normalize_training_data(data: list):
+    new_data = np.array([convert_to_integer(val) for val in data], dtype=np.int64)
+    return new_data
--- a/main.py
+++ b/main.py
@@ -1,11 +1,16 @@
 import numpy as np # helps with the math
 import matplotlib.pyplot as plt # to plot error during training
+import tensorflow as tf

-from data.get_data import pull_training_data
+from rich.progress import Progress
+
+from data.get_data import pull_training_data, normalize_training_data
 from data.db_connect import Database
 from data.build_weather import get_weather, get_sun_and_moon_phase
 from data.stats_importer import Importer

+from neuralnet.neuralnetwork import NeuralNetwork
+
 # input data
 inputs = np.array([[0, 0, 1, 0],
                   [0, 0, 1, 1],
@@ -23,11 +28,54 @@ if __name__ == '__main__':
    query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
    all_games = db_conn.selectall(query)

-    for game in all_games:
-        game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
+    compiled_training_data = None
+    compiled_training_results = None
+    
+    with Progress() as p_bar:
+        p_bar_val = p_bar.add_task("Getting Data ...", total=len(all_games))
+        for game in all_games:
+            p_bar.update(p_bar_val, advance=1)
+            game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
+
+            training_data = normalize_training_data(training_data)
+
+            if compiled_training_results is None:
+                compiled_training_data = np.array([training_data])
+                compiled_training_results = np.array([game_result])
+            else:
+                compiled_training_data = np.append(compiled_training_data, np.array([training_data]), axis=0)
+                compiled_training_results = np.append(compiled_training_results, np.array([game_result]), axis=0)
+
+    ctd_len = len(compiled_training_data)
+    ctr_len = len(compiled_training_results)
+
+    td = compiled_training_data[:ctd_len-100]
+    tdt = compiled_training_data[ctd_len-100:]
+
+    tr = compiled_training_results[:ctr_len-100]
+    trt = compiled_training_results[ctr_len-100:]
+
+    nn = NeuralNetwork(len(compiled_training_data[0]))
+    nn.train(td, tr)
+    nn.summary()
+    predictions = nn.predict(np.array(tdt))
+
+    total_num = 0
+    accurate_num = 0
+    for pred, act in zip(predictions, trt):
+        total_num += 1
+        pred = round(pred[0] * 1000) / 1000.0
+        guess = round(pred)
+        print(f"Pred: {pred} -> Gue: {guess} -> Res: {act} -> Cor: {guess == act}")
+
+        accurate_num += int(guess==act)
+
+    print(f"Total: {total_num} -> Accu: {accurate_num} -> Perc: {accurate_num/total_num}")
+    #for index in range(len(trt)):
+    #    print(f"pred: {nn.predict(np.array([tdt[index]]))} : act : {trt[index]}")

    """
-    build_db_path = "./data/sql/build_db.sql"
+    build_db_path = "./data/sql/build_db .sql"
    fill_parks_path = "./data/sql/prefill_parks.sql"
    fill_teams_path = "./data/sql/prefill_teams.sql"

--- a/neuralnet/nerulnetdata.py
+++ b/neuralnet/nerulnetdata.py
@@ -0,0 +1,160 @@
+"""
+Data to use:
+    Previous Game 2
+        game_date
+        day_of_week
+        day_night
+        park
+        length_in_minutes
+        score
+        line_score
+        at_bats
+        hits
+        doubles
+        triples
+        homeruns
+        rbis
+        sacrifice_hits
+        sacrifice_flies
+        hit_by_pitch
+        walks
+        intentional_walks
+        strikeouts
+        stolen_bases
+        caught_stealing
+        grounded_double
+        interference
+        left_on_base
+        pitchers_used
+        individual_earned_runs
+        earned_runs
+        wild_pitches
+        assits
+        errors
+        double_play
+        triple_play
+        starting_1_id
+        starting_1_position
+        starting_2_id
+        starting_2_position
+        starting_3_id
+        starting_3_position
+        starting_4_id
+        starting_4_position
+        starting_5_id
+        starting_5_position
+        starting_6_id
+        starting_6_position
+        starting_7_id
+        starting_7_position
+        starting_8_id
+        starting_8_position
+        starting_9_id
+        starting_9_position
+
+    Previous Game 1
+        game_date
+        day_of_week
+        day_night
+        park
+        length_in_minutes
+        score
+        line_score
+        at_bats
+        hits
+        doubles
+        triples
+        homeruns
+        rbis
+        sacrifice_hits
+        sacrifice_flies
+        hit_by_pitch
+        walks
+        intentional_walks
+        strikeouts
+        stolen_bases
+        caught_stealing
+        grounded_double
+        interference
+        left_on_base
+        pitchers_used
+        individual_earned_runs
+        earned_runs
+        wild_pitches
+        assits
+        errors
+        double_play
+        triple_play
+        starting_1_id
+        starting_1_position
+        starting_2_id
+        starting_2_position
+        starting_3_id
+        starting_3_position
+        starting_4_id
+        starting_4_position
+        starting_5_id
+        starting_5_position
+        starting_6_id
+        starting_6_position
+        starting_7_id
+        starting_7_position
+        starting_8_id
+        starting_8_position
+        starting_9_id
+        starting_9_position
+
+    Predicted Game
+        game_date
+        day_of_week
+        day_night
+        park_id
+        home_plate_ump_id
+        b1_ump_id
+        b2_ump_id
+        b3_ump_id
+        lf_fence_distance
+        lf_fence_height
+        ct_fence_distance
+        ct_fence_height
+        rf_fence_distance
+        rf_fence_height
+        has_roof
+        latitude
+        longitude
+        elevation
+        starting_1_id
+        starting_1_position
+        starting_2_id
+        starting_2_position
+        starting_3_id
+        starting_3_position
+        starting_4_id
+        starting_4_position
+        starting_5_id
+        starting_5_position
+        starting_6_id
+        starting_6_position
+        starting_7_id
+        starting_7_position
+        starting_8_id
+        starting_8_position
+        starting_9_id
+        starting_9_position
+        temperature
+        humidity
+        dew_point
+        apparent_temperature
+        air_pressure
+        percipitation
+        rain
+        snowfall
+        cloud_cover
+        wind_speed
+        wind_direction
+        wind_gusts
+        sun_rise
+        sun_set
+        moon_phase
+
+"""
--- a/neuralnet/neuralnetwork.py
+++ b/neuralnet/neuralnetwork.py
@@ -0,0 +1,40 @@
+import os
+import numpy as np
+import tensorflow as tf
+
+# create NeuralNetwork class
+class NeuralNetwork:
+    def __init__(self, input_length: int):
+        self.model = tf.keras.Sequential([
+            tf.keras.Input(shape=(input_length,), dtype=tf.int64),
+            tf.keras.layers.Dense(512, activation='relu'),
+            tf.keras.layers.Dropout(0.5),
+            tf.keras.layers.Dense(256, activation='relu'),
+            tf.keras.layers.Dropout(0.5),
+            tf.keras.layers.Dense(128, activation='relu'),
+            tf.keras.layers.Dense(1, activation='sigmoid'),
+        ])
+
+        self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+        # Setup checkpoint
+        self.checkpoint_path = "./training/cp.ckpt.weights.h5"
+        self.cp_callback = tf.keras.callbacks.ModelCheckpoint(
+            filepath=self.checkpoint_path, 
+            save_weights_only=True, 
+            save_best_only=True,
+            monitor='loss',
+            mode='min',
+        )
+
+        #if os.path.isfile(self.checkpoint_path):
+        #    self.model.load_weights(self.checkpoint_path)
+
+    def train(self, inputs :list, outputs :list):
+        self.model.fit(inputs, outputs, epochs=100, batch_size=64, callbacks=[self.cp_callback])
+
+    def summary(self):
+        print(self.model.summary())
+
+    def predict(self, new_input):
+        return self.model.predict(new_input)
Author	SHA1	Message	Date
paul	e6b8a9f213	Initial setup for training	2025-09-19 21:50:46 +00:00
paul	2db9953601	Adding the initial neuralnet files	2025-09-18 19:18:43 +00:00