From f59ae02734d7b149ed8e84b140a203cc511ca780 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 8 Aug 2025 19:17:44 +0000 Subject: [PATCH] Updating file parsing --- data/stats_importer.py | 109 +++++++++++++++++++++++------------------ main.py | 56 ++------------------- 2 files changed, 64 insertions(+), 101 deletions(-) diff --git a/data/stats_importer.py b/data/stats_importer.py index 339fa99..e68d626 100644 --- a/data/stats_importer.py +++ b/data/stats_importer.py @@ -29,13 +29,12 @@ class Importer: src_file = os.path.join(source_dir, filename) dest_file = os.path.join(dest_dir, filename) - self.parse_one_file(f"{source_dir}/{filename}") - - try: - shutil.copy(src_file, dest_file) - print(f"Copied {filename} to {dest_dir}") - except Exception as e: - print(f"Failed to copy {filename}: {e}") + if self.parse_one_file(f"{source_dir}/{filename}"): + try: + shutil.copy(src_file, dest_file) + print(f"Copied {filename} to {dest_dir}") + except Exception as e: + print(f"Failed to copy {filename}: {e}") def parse_one_file(self, filepath): bb_dict = {} @@ -44,15 +43,18 @@ class Importer: bb_dict = list(reader) for game in bb_dict: - self.populate_database_with_stats(game) + if not self.populate_database_with_stats(game): + return False + + return True - def populate_database_with_stats(self, game_stats): + def populate_database_with_stats(self, game_stats) -> bool: parkid = game_stats["park-id"] park_data = self.database.select("SELECT latitude, longitude FROM parks WHERE park_id = ?", (game_stats["park-id"],)) if park_data is None: print(f"{parkid} is None") - return + return True insert_game = """ INSERT INTO games @@ -79,19 +81,6 @@ class Importer: ) """ - game_data = [ - game_stats["date"], game_stats["num-of-game"], game_stats["day-of-week"], - game_stats["length-in-outs"], game_stats["day-night"], game_stats["completion-info"], - game_stats["forfeit"], game_stats["protest"], game_stats["park-id"], - game_stats["attendance"], game_stats["length-in-min"], game_stats["home-plate-ump-id"], - game_stats["home-plate-ump-name"], game_stats["1b-plate-ump-id"], game_stats["1b-plate-ump-name"], - game_stats["2b-plate-ump-id"], game_stats["2b-plate-ump-name"], game_stats["3b-plate-ump-id"], - game_stats["3b-plate-ump-name"], game_stats["lf-plate-ump-id"], game_stats["lf-plate-ump-name"], - game_stats["rf-plate-ump-id"], game_stats["rf-plate-ump-name"], - ] - - game_id = self.database.insert(insert_game, game_data) - insert_team_game = """ INSERT INTO team_game ( @@ -145,6 +134,53 @@ class Importer: ) """ + insert_into_weather = """ + INSERT INTO weather + ( + game_id, temperature, humidity, + dew_point, apparent_temperature, air_pressure, + precipitation, rain, snowfall, + cloud_cover, wind_speed, wind_direction, + wind_gusts, sun_rise, sun_set, + moon_phase + ) + VALUES + ( + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ?, ?, ?, + ? + ) + """ + + hour = 15 if game_stats["day-night"] == "D" else 19 + historic_weather = get_weather(park_data[0], park_data[1], game_stats["date"], hour) + + if "error" in historic_weather: + print(f"Error: {historic_weather['error']}: Details: {historic_weather['details']}") + return False + + if "hourly" not in historic_weather: + print(f"Failed to get weather: Full JSON: {historic_weather}") + return False + + historic_weather = historic_weather["hourly"] + + game_data = [ + game_stats["date"], game_stats["num-of-game"], game_stats["day-of-week"], + game_stats["length-in-outs"], game_stats["day-night"], game_stats["completion-info"], + game_stats["forfeit"], game_stats["protest"], game_stats["park-id"], + game_stats["attendance"], game_stats["length-in-min"], game_stats["home-plate-ump-id"], + game_stats["home-plate-ump-name"], game_stats["1b-plate-ump-id"], game_stats["1b-plate-ump-name"], + game_stats["2b-plate-ump-id"], game_stats["2b-plate-ump-name"], game_stats["3b-plate-ump-id"], + game_stats["3b-plate-ump-name"], game_stats["lf-plate-ump-id"], game_stats["lf-plate-ump-name"], + game_stats["rf-plate-ump-id"], game_stats["rf-plate-ump-name"], + ] + + game_id = self.database.insert(insert_game, game_data) + visiting_team_data = [ game_id, game_stats["visiting-team"], game_stats["visiting-game-num"], game_stats["visiting-score"], game_stats["visiting-line-scores"], game_stats["visiting-at-bats"], @@ -198,31 +234,6 @@ class Importer: self.database.insert(insert_team_game, visiting_team_data) self.database.insert(insert_team_game, home_team_data) - hour = 15 if game_stats["day-night"] == "D" else 19 - historic_weather = get_weather(park_data[0], park_data[1], game_stats["date"], hour) - historic_weather = historic_weather["hourly"] - - insert_into_weather = """ - INSERT INTO weather - ( - game_id, temperature, humidity, - dew_point, apparent_temperature, air_pressure, - precipitation, rain, snowfall, - cloud_cover, wind_speed, wind_direction, - wind_gusts, sun_rise, sun_set, - moon_phase - ) - VALUES - ( - ?, ?, ?, - ?, ?, ?, - ?, ?, ?, - ?, ?, ?, - ?, ?, ?, - ? - ) - """ - (sunrise_time, sunset_time, moonphase) = get_sun_and_moon_phase(park_data[0], park_data[1], game_stats["date"]) weather_data = [ @@ -235,3 +246,5 @@ class Importer: ] self.database.insert(insert_into_weather, weather_data) + + return True diff --git a/main.py b/main.py index 169fd2c..d627971 100644 --- a/main.py +++ b/main.py @@ -14,50 +14,6 @@ inputs = np.array([[0, 0, 1, 0], # output data outputs = np.array([[0], [0], [0], [1], [1], [1]]) -# create NeuralNetwork class -class NeuralNetwork: - - # intialize variables in class - def __init__(self, inputs, outputs): - self.inputs = inputs - self.outputs = outputs - # initialize weights as .50 for simplicity - self.weights = np.array([[.50], [.50], [.50], [0.50]]) - self.error_history = [] - self.epoch_list = [] - - #activation function ==> S(x) = 1/1+e^(-x) - def sigmoid(self, x, deriv=False): - if deriv == True: - return x * (1 - x) - return 1 / (1 + np.exp(-x)) - - # data will flow through the neural network. - def feed_forward(self): - self.hidden = self.sigmoid(np.dot(self.inputs, self.weights)) - - # going backwards through the network to update weights - def backpropagation(self): - self.error = self.outputs - self.hidden - delta = self.error * self.sigmoid(self.hidden, deriv=True) - self.weights += np.dot(self.inputs.T, delta) - - # train the neural net for 25,000 iterations - def train(self, epochs=25000): - for epoch in range(epochs): - # flow forward and produce an output - self.feed_forward() - # go back though the network to make corrections based on the output - self.backpropagation() - # keep track of the error history over each epoch - self.error_history.append(np.average(np.abs(self.error))) - self.epoch_list.append(epoch) - - # function to predict output on new and unseen input data - def predict(self, new_input): - prediction = self.sigmoid(np.dot(new_input, self.weights)) - return prediction - if __name__ == '__main__': build_db_path = "./data/sql/build_db.sql" fill_parks_path = "./data/sql/prefill_parks.sql" @@ -73,14 +29,7 @@ if __name__ == '__main__': imp = Importer(db_conn) imp.parse_all_data("./data/stats/to_import", "./data/stats/imported/") - #we = Weather() - #print(we.get_weather(39.26733000, -76.79831000, "20250706", 12)) - - #park_data = db_conn.select("SELECT latitude, longitude FROM parks WHERE park_id = ?", ("ATL03",)) - #print(get_sun_and_moon_phase(park_data[0], park_data[1], "20250709")) - #historic_weather = get_weather(park_data[0], park_data[1], game_stats["date"], hour) - - +""" else: # create neural network NN = NeuralNetwork(inputs, outputs) @@ -100,4 +49,5 @@ else: plt.plot(NN.epoch_list, NN.error_history) plt.xlabel('Epoch') plt.ylabel('Error') - plt.savefig('plot.png') \ No newline at end of file + plt.savefig('plot.png') +""" \ No newline at end of file