Moving around csvs and better handling of duplicate data. Pulling all relevant data
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,5 +2,4 @@
|
|||||||
.Trash-1000
|
.Trash-1000
|
||||||
target
|
target
|
||||||
database
|
database
|
||||||
data/stats/imported
|
data/__pycache__
|
||||||
data/__pycache__
|
|
||||||
|
|||||||
@@ -23,10 +23,15 @@ class Database:
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def selectall(self, query, values):
|
def selectall(self, query, values = None):
|
||||||
# Query the database for the specified index
|
# Query the database for the specified index
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
cursor.execute(query, values)
|
|
||||||
|
if values is None:
|
||||||
|
cursor.execute(query)
|
||||||
|
else:
|
||||||
|
cursor.execute(query, values)
|
||||||
|
|
||||||
result = cursor.fetchall()
|
result = cursor.fetchall()
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -253,8 +253,6 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
|
|||||||
print(f"Failed to get game data for date: {game_date}, number: {game_number}, park: {park_id}")
|
print(f"Failed to get game data for date: {game_date}, number: {game_number}, park: {park_id}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
print(curr_game)
|
|
||||||
|
|
||||||
select_teams = """
|
select_teams = """
|
||||||
SELECT
|
SELECT
|
||||||
win, team, home,
|
win, team, home,
|
||||||
@@ -400,6 +398,4 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
|
|||||||
]
|
]
|
||||||
training_data = [*training_data, *prev_game_data]
|
training_data = [*training_data, *prev_game_data]
|
||||||
|
|
||||||
print(f"{training_result}\n{training_data}")
|
|
||||||
|
|
||||||
return (training_result, training_data)
|
return (training_result, training_data)
|
||||||
|
|||||||
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
2430
data/stats/gl2024.csv
Normal file
2430
data/stats/gl2024.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,33 +11,17 @@ class Importer:
|
|||||||
def __init__(self, database: Database):
|
def __init__(self, database: Database):
|
||||||
self.database = database
|
self.database = database
|
||||||
|
|
||||||
def parse_all_data(self, source_dir, dest_dir):
|
def parse_all_data(self, source_dir):
|
||||||
# Ensure the destination directory exists
|
|
||||||
if not os.path.exists(dest_dir):
|
|
||||||
os.makedirs(dest_dir)
|
|
||||||
|
|
||||||
# List all files in the source and destination directories
|
# List all files in the source and destination directories
|
||||||
source_files = set(os.listdir(source_dir))
|
source_files = set(os.listdir(source_dir))
|
||||||
for filename in os.listdir(dest_dir):
|
|
||||||
if filename.endswith('.csv'):
|
|
||||||
source_files.discard(filename)
|
|
||||||
|
|
||||||
dest_files = set(os.listdir(dest_dir))
|
|
||||||
|
|
||||||
# Find files that are in the source but not in the destination
|
|
||||||
missing_files = source_files - dest_files
|
|
||||||
|
|
||||||
# Copy any missing CSV files from the source directory to the destination directory
|
# Copy any missing CSV files from the source directory to the destination directory
|
||||||
for filename in missing_files:
|
for filename in source_files:
|
||||||
src_file = os.path.join(source_dir, filename)
|
src_file = os.path.join(source_dir, filename)
|
||||||
dest_file = os.path.join(dest_dir, filename)
|
dest_file = os.path.join(dest_dir, filename)
|
||||||
|
|
||||||
if self.parse_one_file(f"{source_dir}/{filename}"):
|
if not self.parse_one_file(f"{source_dir}/{filename}"):
|
||||||
try:
|
print(f"Failed to parse {source_dir}/{filename}")
|
||||||
shutil.copy(src_file, dest_file)
|
|
||||||
print(f"Copied {filename} to {dest_dir}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Failed to copy {filename}: {e}")
|
|
||||||
|
|
||||||
def parse_one_file(self, filepath):
|
def parse_one_file(self, filepath):
|
||||||
bb_dict = {}
|
bb_dict = {}
|
||||||
@@ -61,6 +45,7 @@ class Importer:
|
|||||||
time.sleep(60*60*24)
|
time.sleep(60*60*24)
|
||||||
|
|
||||||
if not self.populate_database_with_stats(game):
|
if not self.populate_database_with_stats(game):
|
||||||
|
print(f"Failed to parse and populate {game}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
12
main.py
12
main.py
@@ -17,12 +17,14 @@ inputs = np.array([[0, 0, 1, 0],
|
|||||||
outputs = np.array([[0], [0], [0], [1], [1], [1]])
|
outputs = np.array([[0], [0], [0], [1], [1], [1]])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#db_file = "./database/baseball.db"
|
db_file = "./database/baseball.db"
|
||||||
#db_conn = Database(db_file)
|
db_conn = Database(db_file)
|
||||||
|
|
||||||
#pull_training_data(db_conn, "20240602", 0, "BAL12")
|
query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
|
||||||
|
all_games = db_conn.selectall(query)
|
||||||
|
|
||||||
print(get_sun_and_moon_phase(39.283889, -76.621667, "20240602"))
|
for game in all_games:
|
||||||
|
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
|
||||||
|
|
||||||
"""
|
"""
|
||||||
build_db_path = "./data/sql/build_db.sql"
|
build_db_path = "./data/sql/build_db.sql"
|
||||||
@@ -34,7 +36,7 @@ if __name__ == '__main__':
|
|||||||
db_conn.run_sql_file(fill_teams_path)
|
db_conn.run_sql_file(fill_teams_path)
|
||||||
|
|
||||||
imp = Importer(db_conn)
|
imp = Importer(db_conn)
|
||||||
imp.parse_all_data("./data/stats/to_import", "./data/stats/imported/")
|
imp.parse_all_data("./data/stats/", "./data/stats/imported/")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user