Moving around csvs and better handling of duplicate data. Pulling all relevant data
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,5 +2,4 @@
|
||||
.Trash-1000
|
||||
target
|
||||
database
|
||||
data/stats/imported
|
||||
data/__pycache__
|
||||
@@ -23,10 +23,15 @@ class Database:
|
||||
else:
|
||||
return None
|
||||
|
||||
def selectall(self, query, values):
|
||||
def selectall(self, query, values = None):
|
||||
# Query the database for the specified index
|
||||
cursor = self.db.cursor()
|
||||
cursor.execute(query, values)
|
||||
|
||||
if values is None:
|
||||
cursor.execute(query)
|
||||
else:
|
||||
cursor.execute(query, values)
|
||||
|
||||
result = cursor.fetchall()
|
||||
if result:
|
||||
return result
|
||||
|
||||
@@ -253,8 +253,6 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
|
||||
print(f"Failed to get game data for date: {game_date}, number: {game_number}, park: {park_id}")
|
||||
return None
|
||||
|
||||
print(curr_game)
|
||||
|
||||
select_teams = """
|
||||
SELECT
|
||||
win, team, home,
|
||||
@@ -400,6 +398,4 @@ def pull_training_data(database: Database, game_date: str, game_number: int, par
|
||||
]
|
||||
training_data = [*training_data, *prev_game_data]
|
||||
|
||||
print(f"{training_result}\n{training_data}")
|
||||
|
||||
return (training_result, training_data)
|
||||
|
||||
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
2430
data/stats/gl2024.csv
Normal file
2430
data/stats/gl2024.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,33 +11,17 @@ class Importer:
|
||||
def __init__(self, database: Database):
|
||||
self.database = database
|
||||
|
||||
def parse_all_data(self, source_dir, dest_dir):
|
||||
# Ensure the destination directory exists
|
||||
if not os.path.exists(dest_dir):
|
||||
os.makedirs(dest_dir)
|
||||
|
||||
def parse_all_data(self, source_dir):
|
||||
# List all files in the source and destination directories
|
||||
source_files = set(os.listdir(source_dir))
|
||||
for filename in os.listdir(dest_dir):
|
||||
if filename.endswith('.csv'):
|
||||
source_files.discard(filename)
|
||||
|
||||
dest_files = set(os.listdir(dest_dir))
|
||||
|
||||
# Find files that are in the source but not in the destination
|
||||
missing_files = source_files - dest_files
|
||||
|
||||
# Copy any missing CSV files from the source directory to the destination directory
|
||||
for filename in missing_files:
|
||||
for filename in source_files:
|
||||
src_file = os.path.join(source_dir, filename)
|
||||
dest_file = os.path.join(dest_dir, filename)
|
||||
|
||||
if self.parse_one_file(f"{source_dir}/{filename}"):
|
||||
try:
|
||||
shutil.copy(src_file, dest_file)
|
||||
print(f"Copied {filename} to {dest_dir}")
|
||||
except Exception as e:
|
||||
print(f"Failed to copy {filename}: {e}")
|
||||
if not self.parse_one_file(f"{source_dir}/{filename}"):
|
||||
print(f"Failed to parse {source_dir}/{filename}")
|
||||
|
||||
def parse_one_file(self, filepath):
|
||||
bb_dict = {}
|
||||
@@ -61,6 +45,7 @@ class Importer:
|
||||
time.sleep(60*60*24)
|
||||
|
||||
if not self.populate_database_with_stats(game):
|
||||
print(f"Failed to parse and populate {game}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
12
main.py
12
main.py
@@ -17,12 +17,14 @@ inputs = np.array([[0, 0, 1, 0],
|
||||
outputs = np.array([[0], [0], [0], [1], [1], [1]])
|
||||
|
||||
if __name__ == '__main__':
|
||||
#db_file = "./database/baseball.db"
|
||||
#db_conn = Database(db_file)
|
||||
db_file = "./database/baseball.db"
|
||||
db_conn = Database(db_file)
|
||||
|
||||
#pull_training_data(db_conn, "20240602", 0, "BAL12")
|
||||
query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
|
||||
all_games = db_conn.selectall(query)
|
||||
|
||||
print(get_sun_and_moon_phase(39.283889, -76.621667, "20240602"))
|
||||
for game in all_games:
|
||||
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
|
||||
|
||||
"""
|
||||
build_db_path = "./data/sql/build_db.sql"
|
||||
@@ -34,7 +36,7 @@ if __name__ == '__main__':
|
||||
db_conn.run_sql_file(fill_teams_path)
|
||||
|
||||
imp = Importer(db_conn)
|
||||
imp.parse_all_data("./data/stats/to_import", "./data/stats/imported/")
|
||||
imp.parse_all_data("./data/stats/", "./data/stats/imported/")
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user