263 lines
13 KiB
Python
263 lines
13 KiB
Python
import os
|
|
import csv
|
|
import time
|
|
import shutil
|
|
|
|
from math import *
|
|
from data.db_connect import Database
|
|
from data.build_weather import get_weather, get_sun_and_moon_phase
|
|
|
|
class Importer:
|
|
def __init__(self, database: Database):
|
|
self.database = database
|
|
|
|
def parse_all_data(self, source_dir):
|
|
# List all files in the source and destination directories
|
|
source_files = set(os.listdir(source_dir))
|
|
|
|
# Copy any missing CSV files from the source directory to the destination directory
|
|
for filename in source_files:
|
|
src_file = os.path.join(source_dir, filename)
|
|
dest_file = os.path.join(dest_dir, filename)
|
|
|
|
if not self.parse_one_file(f"{source_dir}/{filename}"):
|
|
print(f"Failed to parse {source_dir}/{filename}")
|
|
|
|
def parse_one_file(self, filepath):
|
|
bb_dict = {}
|
|
with open(filepath, 'r') as bb_data_file:
|
|
reader = csv.DictReader(bb_data_file)
|
|
bb_dict = list(reader)
|
|
|
|
count = 0
|
|
for game in bb_dict:
|
|
# Delay to not overwhelm the free api
|
|
count += 1
|
|
print(f"Current line {count}")
|
|
if count % 600 == 0:
|
|
print("Sleeping for 1 min")
|
|
time.sleep(60)
|
|
if count % 5000 == 0:
|
|
print("Sleeping for 1 hour")
|
|
time.sleep(60*60)
|
|
if count % 10000 == 0:
|
|
print("Sleeping for 1 day")
|
|
time.sleep(60*60*24)
|
|
|
|
if not self.populate_database_with_stats(game):
|
|
print(f"Failed to parse and populate {game}")
|
|
return False
|
|
|
|
return True
|
|
|
|
def populate_database_with_stats(self, game_stats) -> bool:
|
|
parkid = game_stats["park-id"]
|
|
park_data = self.database.select("SELECT latitude, longitude FROM parks WHERE park_id = ?", (game_stats["park-id"],))
|
|
|
|
if park_data is None:
|
|
print(f"{parkid} is None")
|
|
return True
|
|
|
|
check_game_added_query = "SELECT id FROM games WHERE game_date = ? AND game_number = ? AND park_id = ?"
|
|
check_game_added_data = [game_stats["date"], game_stats["num-of-game"], game_stats['park-id']]
|
|
if self.database.select(check_game_added_query, check_game_added_data) is not None:
|
|
return True
|
|
|
|
insert_game = """
|
|
INSERT INTO games
|
|
(
|
|
game_date, game_number, day_of_week,
|
|
length_in_outs, day_night, completion_info,
|
|
forfeit, protest, park_id,
|
|
attendence, length_in_minutes, home_plate_ump_id,
|
|
home_plate_ump_name, b1_ump_id, b1_ump_name,
|
|
b2_ump_id, b2_ump_name, b3_ump_id,
|
|
b3_ump_name, lf_ump_id, lf_ump_name,
|
|
rf_ump_id, rf_ump_name
|
|
)
|
|
VALUES
|
|
(
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?
|
|
)
|
|
"""
|
|
|
|
insert_team_game = """
|
|
INSERT INTO team_game
|
|
(
|
|
game, team, game_num,
|
|
score, line_score, win, home, at_bats,
|
|
hits, doubles, triples,
|
|
homeruns, rbis, sacrifice_hits,
|
|
sacrifice_flies, hit_by_pitch, walks,
|
|
intentional_walks, strikeouts, stolen_bases,
|
|
caught_stealing, grounded_double, interference,
|
|
left_on_base, pitchers_used, individual_earned_runs,
|
|
earned_runs, wild_pitches, balks,
|
|
putouts, assists, errors,
|
|
passed, double_play, triple_play,
|
|
manager_id, manager_name, starting_pitcher_id,
|
|
starting_pitcher_name, starting_1_id, starting_1_name,
|
|
starting_1_position, starting_2_id, starting_2_name,
|
|
starting_2_position, starting_3_id, starting_3_name,
|
|
starting_3_position, starting_4_id, starting_4_name,
|
|
starting_4_position, starting_5_id, starting_5_name,
|
|
starting_5_position, starting_6_id, starting_6_name,
|
|
starting_6_position, starting_7_id, starting_7_name,
|
|
starting_7_position, starting_8_id, starting_8_name,
|
|
starting_8_position, starting_9_id, starting_9_name,
|
|
starting_9_position
|
|
)
|
|
VALUES
|
|
(
|
|
?, ?, ?,
|
|
?, ?, ?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?
|
|
)
|
|
"""
|
|
|
|
insert_into_weather = """
|
|
INSERT INTO weather
|
|
(
|
|
game_id, temperature, humidity,
|
|
dew_point, apparent_temperature, air_pressure,
|
|
precipitation, rain, snowfall,
|
|
cloud_cover, wind_speed, wind_direction,
|
|
wind_gusts, sun_rise, sun_set,
|
|
moon_phase
|
|
)
|
|
VALUES
|
|
(
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?, ?, ?,
|
|
?
|
|
)
|
|
"""
|
|
|
|
hour = 15 if game_stats["day-night"] == "D" else 19
|
|
historic_weather = get_weather(park_data[0], park_data[1], game_stats["date"], hour)
|
|
|
|
if "error" in historic_weather:
|
|
print(f"Error: {historic_weather['error']}: Details: {historic_weather['details']}")
|
|
if "No weather data available" in historic_weather['details']:
|
|
historic_weather = None
|
|
else:
|
|
return False
|
|
elif "hourly" not in historic_weather:
|
|
print(f"Failed to get weather: Full JSON: {historic_weather}")
|
|
historic_weather = None
|
|
else:
|
|
historic_weather = historic_weather["hourly"]
|
|
|
|
game_data = [
|
|
game_stats["date"], game_stats["num-of-game"], game_stats["day-of-week"],
|
|
game_stats["length-in-outs"], game_stats["day-night"], game_stats["completion-info"],
|
|
game_stats["forfeit"], game_stats["protest"], game_stats["park-id"],
|
|
game_stats["attendance"], game_stats["length-in-min"], game_stats["home-plate-ump-id"],
|
|
game_stats["home-plate-ump-name"], game_stats["1b-plate-ump-id"], game_stats["1b-plate-ump-name"],
|
|
game_stats["2b-plate-ump-id"], game_stats["2b-plate-ump-name"], game_stats["3b-plate-ump-id"],
|
|
game_stats["3b-plate-ump-name"], game_stats["lf-plate-ump-id"], game_stats["lf-plate-ump-name"],
|
|
game_stats["rf-plate-ump-id"], game_stats["rf-plate-ump-name"],
|
|
]
|
|
|
|
game_id = self.database.insert(insert_game, game_data)
|
|
|
|
visiting_win_loss = int(game_stats["visiting-score"]) > int(game_stats["home-score"])
|
|
visiting_team_data = [
|
|
game_id, game_stats["visiting-team"], game_stats["visiting-game-num"],
|
|
game_stats["visiting-score"], game_stats["visiting-line-scores"], visiting_win_loss, 0, game_stats["visiting-at-bats"],
|
|
game_stats["visiting-hits"], game_stats["visiting-doubles"], game_stats["visiting-triples"],
|
|
game_stats["visiting-homeruns"], game_stats["visiting-rbi"], game_stats["visiting-sacrifice-hits"],
|
|
game_stats["visiting-sacrifice-flies"], game_stats["visiting-hit-by-pitch"], game_stats["visiting-walks"],
|
|
game_stats["visiting-intentional-walks"], game_stats["visiting-strikeouts"], game_stats["visiting-stolen-bases"],
|
|
game_stats["visiting-caught-stealing"], game_stats["visiting-grounded-double"], game_stats["visiting-interference"],
|
|
game_stats["visiting-left-on-base"], game_stats["visiting-pitchers-used"], game_stats["visiting-individual-earned-runs"],
|
|
game_stats["visiting-team-earned-runs"], game_stats["visiting-wild-pitches"], game_stats["visiting-balks"],
|
|
game_stats["visiting-putouts"], game_stats["visiting-assists"], game_stats["visiting-errors"],
|
|
game_stats["visiting-passed"], game_stats["visiting-double-play"], game_stats["visiting-triple-play"],
|
|
game_stats["visiting-manager-id"], game_stats["visiting-manager-name"], game_stats["visiting-start-pitcher-id"],
|
|
game_stats["visiting-start-pitcher-name"], game_stats["visiting-starting-1-id"], game_stats["visiting-starting-1-name"],
|
|
game_stats["visiting-starting-1-position"], game_stats["visiting-starting-2-id"], game_stats["visiting-starting-2-name"],
|
|
game_stats["visiting-starting-2-position"], game_stats["visiting-starting-3-id"], game_stats["visiting-starting-3-name"],
|
|
game_stats["visiting-starting-3-position"], game_stats["visiting-starting-4-id"], game_stats["visiting-starting-4-name"],
|
|
game_stats["visiting-starting-4-position"], game_stats["visiting-starting-5-id"], game_stats["visiting-starting-5-name"],
|
|
game_stats["visiting-starting-5-position"], game_stats["visiting-starting-6-id"], game_stats["visiting-starting-6-name"],
|
|
game_stats["visiting-starting-6-position"], game_stats["visiting-starting-7-id"], game_stats["visiting-starting-7-name"],
|
|
game_stats["visiting-starting-7-position"], game_stats["visiting-starting-8-id"], game_stats["visiting-starting-8-name"],
|
|
game_stats["visiting-starting-8-position"], game_stats["visiting-starting-9-id"], game_stats["visiting-starting-9-name"],
|
|
game_stats["visiting-starting-9-position"]
|
|
]
|
|
|
|
home_win_loss = int(game_stats["home-score"]) > int(game_stats["visiting-score"])
|
|
home_team_data = [
|
|
game_id, game_stats["home-team"], game_stats["home-game-num"],
|
|
game_stats["home-score"], game_stats["home-line-scores"], home_win_loss, 1, game_stats["home-at-bats"],
|
|
game_stats["home-hits"], game_stats["home-doubles"], game_stats["home-triples"],
|
|
game_stats["home-homeruns"], game_stats["home-rbi"], game_stats["home-sacrifice-hits"],
|
|
game_stats["home-sacrifice-flies"], game_stats["home-hit-by-pitch"], game_stats["home-walks"],
|
|
game_stats["home-intentional-walks"], game_stats["home-strikeouts"], game_stats["home-stolen-bases"],
|
|
game_stats["home-caught-stealing"], game_stats["home-grounded-double"], game_stats["home-interference"],
|
|
game_stats["home-left-on-base"], game_stats["home-pitchers-used"], game_stats["home-individual-earned-runs"],
|
|
game_stats["home-team-earned-runs"], game_stats["home-wild-pitches"], game_stats["home-balks"],
|
|
game_stats["home-putouts"], game_stats["home-assists"], game_stats["home-errors"],
|
|
game_stats["home-passed"], game_stats["home-double-play"], game_stats["home-triple-play"],
|
|
game_stats["home-manager-id"], game_stats["home-manager-name"], game_stats["home-start-pitcher-id"],
|
|
game_stats["home-start-pitcher-name"], game_stats["home-starting-1-id"], game_stats["home-starting-1-name"],
|
|
game_stats["home-starting-1-position"], game_stats["home-starting-2-id"], game_stats["home-starting-2-name"],
|
|
game_stats["home-starting-2-position"], game_stats["home-starting-3-id"], game_stats["home-starting-3-name"],
|
|
game_stats["home-starting-3-position"], game_stats["home-starting-4-id"], game_stats["home-starting-4-name"],
|
|
game_stats["home-starting-4-position"], game_stats["home-starting-5-id"], game_stats["home-starting-5-name"],
|
|
game_stats["home-starting-5-position"], game_stats["home-starting-6-id"], game_stats["home-starting-6-name"],
|
|
game_stats["home-starting-6-position"], game_stats["home-starting-7-id"], game_stats["home-starting-7-name"],
|
|
game_stats["home-starting-7-position"], game_stats["home-starting-8-id"], game_stats["home-starting-8-name"],
|
|
game_stats["home-starting-8-position"], game_stats["home-starting-9-id"], game_stats["home-starting-9-name"],
|
|
game_stats["home-starting-9-position"]
|
|
]
|
|
|
|
self.database.insert(insert_team_game, visiting_team_data)
|
|
self.database.insert(insert_team_game, home_team_data)
|
|
|
|
if historic_weather is not None:
|
|
(sunrise_time, sunset_time, moonphase) = get_sun_and_moon_phase(park_data[0], park_data[1], game_stats["date"])
|
|
|
|
weather_data = [
|
|
game_id, historic_weather["temperature_2m"][hour], historic_weather["relative_humidity_2m"][hour],
|
|
historic_weather["dew_point_2m"][hour], historic_weather["apparent_temperature"][hour], historic_weather["pressure_msl"][hour],
|
|
historic_weather["precipitation"][hour], historic_weather["rain"][hour], historic_weather["snowfall"][hour],
|
|
historic_weather["cloud_cover"][hour], historic_weather["wind_speed_10m"][hour], historic_weather["wind_direction_10m"][hour],
|
|
historic_weather["wind_gusts_10m"][hour], sunrise_time, sunset_time,
|
|
moonphase,
|
|
]
|
|
|
|
self.database.insert(insert_into_weather, weather_data)
|
|
|
|
return True
|