import os import csv import shutil from data.db_connect import Database from data.build_weather import get_weather, get_sun_and_moon_phase class Importer: def __init__(self, database: Database): self.database = database def parse_all_data(self, source_dir, dest_dir): # Ensure the destination directory exists if not os.path.exists(dest_dir): os.makedirs(dest_dir) # List all files in the source and destination directories source_files = set(os.listdir(source_dir)) for filename in os.listdir(dest_dir): if filename.endswith('.csv'): source_files.discard(filename) dest_files = set(os.listdir(dest_dir)) # Find files that are in the source but not in the destination missing_files = source_files - dest_files # Copy any missing CSV files from the source directory to the destination directory for filename in missing_files: src_file = os.path.join(source_dir, filename) dest_file = os.path.join(dest_dir, filename) if self.parse_one_file(f"{source_dir}/{filename}"): try: shutil.copy(src_file, dest_file) print(f"Copied {filename} to {dest_dir}") except Exception as e: print(f"Failed to copy {filename}: {e}") def parse_one_file(self, filepath): bb_dict = {} with open(filepath, 'r') as bb_data_file: reader = csv.DictReader(bb_data_file) bb_dict = list(reader) for game in bb_dict: if not self.populate_database_with_stats(game): return False return True def populate_database_with_stats(self, game_stats) -> bool: parkid = game_stats["park-id"] park_data = self.database.select("SELECT latitude, longitude FROM parks WHERE park_id = ?", (game_stats["park-id"],)) if park_data is None: print(f"{parkid} is None") return True insert_game = """ INSERT INTO games ( game_date, game_number, day_of_week, length_in_outs, day_night, completion_info, forfeit, protest, park_id, attendence, length_in_minutes, home_plate_ump_id, home_plate_ump_name, b1_ump_id, b1_ump_name, b2_ump_id, b2_ump_name, b3_ump_id, b3_ump_name, lf_ump_id, lf_ump_name, rf_ump_id, rf_ump_name ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) """ insert_team_game = """ INSERT INTO team_game ( game, team, game_num, score, line_score, at_bats, hits, doubles, triples, homeruns, rbis, sacrifice_hits, sacrifice_flies, hit_by_pitch, walks, intentional_walks, strikeouts, stolen_bases, caught_stealing, grounded_double, interference, left_on_base, pitchers_used, individual_earned_runs, earned_runs, wild_pitches, balks, putouts, assists, errors, passed, double_play, triple_play, manager_id, manager_name, starting_pitcher_id, starting_pitcher_name, starting_1_id, starting_1_name, starting_1_position, starting_2_id, starting_2_name, starting_2_position, starting_3_id, starting_3_name, starting_3_position, starting_4_id, starting_4_name, starting_4_position, starting_5_id, starting_5_name, starting_5_position, starting_6_id, starting_6_name, starting_6_position, starting_7_id, starting_7_name, starting_7_position, starting_8_id, starting_8_name, starting_8_position, starting_9_id, starting_9_name, starting_9_position ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) """ insert_into_weather = """ INSERT INTO weather ( game_id, temperature, humidity, dew_point, apparent_temperature, air_pressure, precipitation, rain, snowfall, cloud_cover, wind_speed, wind_direction, wind_gusts, sun_rise, sun_set, moon_phase ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) """ hour = 15 if game_stats["day-night"] == "D" else 19 historic_weather = get_weather(park_data[0], park_data[1], game_stats["date"], hour) if "error" in historic_weather: print(f"Error: {historic_weather['error']}: Details: {historic_weather['details']}") return False if "hourly" not in historic_weather: print(f"Failed to get weather: Full JSON: {historic_weather}") return False historic_weather = historic_weather["hourly"] game_data = [ game_stats["date"], game_stats["num-of-game"], game_stats["day-of-week"], game_stats["length-in-outs"], game_stats["day-night"], game_stats["completion-info"], game_stats["forfeit"], game_stats["protest"], game_stats["park-id"], game_stats["attendance"], game_stats["length-in-min"], game_stats["home-plate-ump-id"], game_stats["home-plate-ump-name"], game_stats["1b-plate-ump-id"], game_stats["1b-plate-ump-name"], game_stats["2b-plate-ump-id"], game_stats["2b-plate-ump-name"], game_stats["3b-plate-ump-id"], game_stats["3b-plate-ump-name"], game_stats["lf-plate-ump-id"], game_stats["lf-plate-ump-name"], game_stats["rf-plate-ump-id"], game_stats["rf-plate-ump-name"], ] game_id = self.database.insert(insert_game, game_data) visiting_team_data = [ game_id, game_stats["visiting-team"], game_stats["visiting-game-num"], game_stats["visiting-score"], game_stats["visiting-line-scores"], game_stats["visiting-at-bats"], game_stats["visiting-hits"], game_stats["visiting-doubles"], game_stats["visiting-triples"], game_stats["visiting-homeruns"], game_stats["visiting-rbi"], game_stats["visiting-sacrifice-hits"], game_stats["visiting-sacrifice-flies"], game_stats["visiting-hit-by-pitch"], game_stats["visiting-walks"], game_stats["visiting-intentional-walks"], game_stats["visiting-strikeouts"], game_stats["visiting-stolen-bases"], game_stats["visiting-caught-stealing"], game_stats["visiting-grounded-double"], game_stats["visiting-interference"], game_stats["visiting-left-on-base"], game_stats["visiting-pitchers-used"], game_stats["visiting-individual-earned-runs"], game_stats["visiting-team-earned-runs"], game_stats["visiting-wild-pitches"], game_stats["visiting-balks"], game_stats["visiting-putouts"], game_stats["visiting-assists"], game_stats["visiting-errors"], game_stats["visiting-passed"], game_stats["visiting-double-play"], game_stats["visiting-triple-play"], game_stats["visiting-manager-id"], game_stats["visiting-manager-name"], game_stats["visiting-start-pitcher-id"], game_stats["visiting-start-pitcher-name"], game_stats["visiting-starting-1-id"], game_stats["visiting-starting-1-name"], game_stats["visiting-starting-1-position"], game_stats["visiting-starting-2-id"], game_stats["visiting-starting-2-name"], game_stats["visiting-starting-2-position"], game_stats["visiting-starting-3-id"], game_stats["visiting-starting-3-name"], game_stats["visiting-starting-3-position"], game_stats["visiting-starting-4-id"], game_stats["visiting-starting-4-name"], game_stats["visiting-starting-4-position"], game_stats["visiting-starting-5-id"], game_stats["visiting-starting-5-name"], game_stats["visiting-starting-5-position"], game_stats["visiting-starting-6-id"], game_stats["visiting-starting-6-name"], game_stats["visiting-starting-6-position"], game_stats["visiting-starting-7-id"], game_stats["visiting-starting-7-name"], game_stats["visiting-starting-7-position"], game_stats["visiting-starting-8-id"], game_stats["visiting-starting-8-name"], game_stats["visiting-starting-8-position"], game_stats["visiting-starting-9-id"], game_stats["visiting-starting-9-name"], game_stats["visiting-starting-9-position"] ] home_team_data = [ game_id, game_stats["home-team"], game_stats["home-game-num"], game_stats["home-score"], game_stats["home-line-scores"], game_stats["home-at-bats"], game_stats["home-hits"], game_stats["home-doubles"], game_stats["home-triples"], game_stats["home-homeruns"], game_stats["home-rbi"], game_stats["home-sacrifice-hits"], game_stats["home-sacrifice-flies"], game_stats["home-hit-by-pitch"], game_stats["home-walks"], game_stats["home-intentional-walks"], game_stats["home-strikeouts"], game_stats["home-stolen-bases"], game_stats["home-caught-stealing"], game_stats["home-grounded-double"], game_stats["home-interference"], game_stats["home-left-on-base"], game_stats["home-pitchers-used"], game_stats["home-individual-earned-runs"], game_stats["home-team-earned-runs"], game_stats["home-wild-pitches"], game_stats["home-balks"], game_stats["home-putouts"], game_stats["home-assists"], game_stats["home-errors"], game_stats["home-passed"], game_stats["home-double-play"], game_stats["home-triple-play"], game_stats["home-manager-id"], game_stats["home-manager-name"], game_stats["home-start-pitcher-id"], game_stats["home-start-pitcher-name"], game_stats["home-starting-1-id"], game_stats["home-starting-1-name"], game_stats["home-starting-1-position"], game_stats["home-starting-2-id"], game_stats["home-starting-2-name"], game_stats["home-starting-2-position"], game_stats["home-starting-3-id"], game_stats["home-starting-3-name"], game_stats["home-starting-3-position"], game_stats["home-starting-4-id"], game_stats["home-starting-4-name"], game_stats["home-starting-4-position"], game_stats["home-starting-5-id"], game_stats["home-starting-5-name"], game_stats["home-starting-5-position"], game_stats["home-starting-6-id"], game_stats["home-starting-6-name"], game_stats["home-starting-6-position"], game_stats["home-starting-7-id"], game_stats["home-starting-7-name"], game_stats["home-starting-7-position"], game_stats["home-starting-8-id"], game_stats["home-starting-8-name"], game_stats["home-starting-8-position"], game_stats["home-starting-9-id"], game_stats["home-starting-9-name"], game_stats["home-starting-9-position"] ] self.database.insert(insert_team_game, visiting_team_data) self.database.insert(insert_team_game, home_team_data) (sunrise_time, sunset_time, moonphase) = get_sun_and_moon_phase(park_data[0], park_data[1], game_stats["date"]) weather_data = [ game_id, historic_weather["temperature_2m"][hour], historic_weather["relative_humidity_2m"][hour], historic_weather["dew_point_2m"][hour], historic_weather["apparent_temperature"][hour], historic_weather["pressure_msl"][hour], historic_weather["precipitation"][hour], historic_weather["rain"][hour], historic_weather["snowfall"][hour], historic_weather["cloud_cover"][hour], historic_weather["wind_speed_10m"][hour], historic_weather["wind_direction_10m"][hour], historic_weather["wind_gusts_10m"][hour], sunrise_time, sunset_time, moonphase, ] self.database.insert(insert_into_weather, weather_data) return True