Merge pull request 'Starting to add the function to pull out data' (#14) from 5-get-data into main

Reviewed-on: #14
This commit was merged in pull request #14.
This commit is contained in:
2025-09-18 19:11:13 +00:00
13 changed files with 2949 additions and 107 deletions

1
.gitignore vendored
View File

@@ -2,5 +2,4 @@
.Trash-1000
target
database
data/stats/imported
data/__pycache__

View File

@@ -8,6 +8,7 @@ ephem = "*"
matplotlib = "*"
numpy = "*"
requests = "*"
suntime = "*"
timezonefinder = "*"
[dev-packages]

132
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "0763149251463ba577d24ee5a2399615cd74ddb51f0d7d75bbd777d9b499164f"
"sha256": "7ad75fcb917b3a288fd1156f75a1f0ece49414a473d45695e594becbd9eb1057"
},
"pipfile-spec": 6,
"requires": {
@@ -383,67 +383,67 @@
},
"fonttools": {
"hashes": [
"sha256:01158376b8a418a0bae9625c476cebfcfcb5e6761e9d243b219cd58341e7afbb",
"sha256:02e4fdf27c550dded10fe038a5981c29f81cb9bc649ff2eaa48e80dab8998f97",
"sha256:075f745d539a998cd92cb84c339a82e53e49114ec62aaea8307c80d3ad3aef3a",
"sha256:0b9e4fa7eaf046ed6ac470f6033d52c052481ff7a6e0a92373d14f556f298dc0",
"sha256:1017413cdc8555dce7ee23720da490282ab7ec1cf022af90a241f33f9a49afc4",
"sha256:1ab4c1fb45f2984b8b4a3face7cff0f67f9766e9414cbb6fd061e9d77819de98",
"sha256:2a2d0d33307f6ad3a2086a95dd607c202ea8852fa9fb52af9b48811154d1428a",
"sha256:2aeb829ad9d41a2ef17cab8bb5d186049ba38a840f10352e654aa9062ec32dc1",
"sha256:2beb5bfc4887a3130f8625349605a3a45fe345655ce6031d1bac11017454b943",
"sha256:39dfd42cc2dc647b2c5469bc7a5b234d9a49e72565b96dd14ae6f11c2c59ef15",
"sha256:412a5fd6345872a7c249dac5bcce380393f40c1c316ac07f447bc17d51900922",
"sha256:419f16d750d78e6d704bfe97b48bba2f73b15c9418f817d0cb8a9ca87a5b94bf",
"sha256:42052b56d176f8b315fbc09259439c013c0cb2109df72447148aeda677599612",
"sha256:43ab814bbba5f02a93a152ee61a04182bb5809bd2bc3609f7822e12c53ae2c91",
"sha256:43d177cd0e847ea026fedd9f099dc917da136ed8792d142298a252836390c478",
"sha256:4909cce2e35706f3d18c54d3dcce0414ba5e0fb436a454dffec459c61653b513",
"sha256:4f04c3ffbfa0baafcbc550657cf83657034eb63304d27b05cff1653b448ccff6",
"sha256:5265bc52ed447187d39891b5f21d7217722735d0de9fe81326566570d12851a9",
"sha256:57a3708ca6bfccb790f585fa6d8f29432ec329618a09ff94c16bcb3c55994643",
"sha256:58a8844f96cff35860647a65345bfca87f47a2494bfb4bef754e58c082511443",
"sha256:5b9b4c35b3be45e5bc774d3fc9608bbf4f9a8d371103b858c80edbeed31dd5aa",
"sha256:5c6d8d773470a5107052874341ed3c487c16ecd179976d81afed89dea5cd7406",
"sha256:5d29ab70658d2ec19422b25e6ace00a0b0ae4181ee31e03335eaef53907d2d83",
"sha256:5f3f021cea6e36410874763f4a517a5e2d6ac36ca8f95521f3a9fdaad0fe73dc",
"sha256:6065fdec8ff44c32a483fd44abe5bcdb40dd5e2571a5034b555348f2b3a52cea",
"sha256:647db657073672a8330608970a984d51573557f328030566521bc03415535042",
"sha256:652159e8214eb4856e8387ebcd6b6bd336ee258cbeb639c8be52005b122b9609",
"sha256:729367c91eb1ee84e61a733acc485065a00590618ca31c438e7dd4d600c01486",
"sha256:74995b402ad09822a4c8002438e54940d9f1ecda898d2bb057729d7da983e4cb",
"sha256:8156b11c0d5405810d216f53907bd0f8b982aa5f1e7e3127ab3be1a4062154ff",
"sha256:8387876a8011caec52d327d5e5bca705d9399ec4b17afb8b431ec50d47c17d23",
"sha256:89d9957b54246c6251345297dddf77a84d2c19df96af30d2de24093bbdf0528b",
"sha256:8c8758a7d97848fc8b514b3d9b4cb95243714b2f838dde5e1e3c007375de6214",
"sha256:8ee39da0227950f88626c91e219659e6cd725ede826b1c13edd85fc4cec9bbe6",
"sha256:8f8ef66ac6db450193ed150e10b3b45dde7aded10c5d279968bc63368027f62b",
"sha256:94f9721a564978a10d5c12927f99170d18e9a32e5a727c61eae56f956a4d118b",
"sha256:a960b09ff50c2e87864e83f352e5a90bcf1ad5233df579b1124660e1643de272",
"sha256:ac216a2980a2d2b3b88c68a24f8a9bfb203e2490e991b3238502ad8f1e7bfed0",
"sha256:b11bc177a0d428b37890825d7d025040d591aa833f85f8d8878ed183354f47df",
"sha256:bcd52eaa5c4c593ae9f447c1d13e7e4a00ca21d755645efa660b6999425b3c88",
"sha256:bf5fb864f80061a40c1747e0dbc4f6e738de58dd6675b07eb80bd06a93b063c4",
"sha256:c29ea087843e27a7cffc78406d32a5abf166d92afde7890394e9e079c9b4dbe9",
"sha256:c2b0597522d4c5bb18aa5cf258746a2d4a90f25878cbe865e4d35526abd1b9fc",
"sha256:c536f8a852e8d3fa71dde1ec03892aee50be59f7154b533f0bf3c1174cfd5126",
"sha256:c735e385e30278c54f43a0d056736942023c9043f84ee1021eff9fd616d17693",
"sha256:c866eef7a0ba320486ade6c32bfc12813d1a5db8567e6904fb56d3d40acc5116",
"sha256:cf7c5089d37787387123f1cb8f1793a47c5e1e3d1e4e7bfbc1cc96e0f925eabe",
"sha256:d31dc137ed8ec71dbc446949eba9035926e6e967b90378805dcf667ff57cabb1",
"sha256:d5c3bfdc9663f3d4b565f9cb3b8c1efb3e178186435b45105bde7328cfddd7fe",
"sha256:d601b153e51a5a6221f0d4ec077b6bfc6ac35bfe6c19aeaa233d8990b2b71726",
"sha256:e1ca10da138c300f768bb68e40e5b20b6ecfbd95f91aac4cc15010b6b9d65455",
"sha256:e3680884189e2b7c3549f6d304376e64711fd15118e4b1ae81940cb6b1eaa267",
"sha256:e54437651e1440ee53a95e6ceb6ee440b67a3d348c76f45f4f48de1a5ecab019",
"sha256:e90a89e52deb56b928e761bb5b5f65f13f669bfd96ed5962975debea09776a23",
"sha256:e9ad4ce044e3236f0814c906ccce8647046cc557539661e35211faadf76f283b",
"sha256:ea03f1da0d722fe3c2278a05957e6550175571a4894fbf9d178ceef4a3783d2b",
"sha256:efbec204fa9f877641747f2d9612b2b656071390d7a7ef07a9dbf0ecf9c7195c",
"sha256:fb13823a74b3a9204a8ed76d3d6d5ec12e64cc5bc44914eb9ff1cdac04facd43"
"sha256:036cd87a2dbd7ef72f7b68df8314ced00b8d9973aee296f2464d06a836aeb9a9",
"sha256:0476ea74161322e08c7a982f83558a2b81b491509984523a1a540baf8611cc31",
"sha256:0ec99f9bdfee9cdb4a9172f9e8fd578cce5feb231f598909e0aecf5418da4f25",
"sha256:12dc4670e6e6cc4553e8de190f86a549e08ca83a036363115d94a2d67488831e",
"sha256:14870930181493b1d740b6f25483e20185e5aea58aec7d266d16da7be822b4bb",
"sha256:1603b85d5922042563eea518e272b037baf273b9a57d0f190852b0b075079000",
"sha256:1647201af10993090120da2e66e9526c4e20e88859f3e34aa05b8c24ded2a564",
"sha256:1a1bfe5378962825dabe741720885e8b9ae9745ec7ecc4a5ec1f1ce59a6062bf",
"sha256:2543b81641ea5b8ddfcae7926e62aafd5abc604320b1b119e5218c014a7a5d3c",
"sha256:2a159e36ae530650acd13604f364b3a2477eff7408dcac6a640d74a3744d2514",
"sha256:381bde13216ba09489864467f6bc0c57997bd729abfbb1ce6f807ba42c06cceb",
"sha256:39ad9612c6a622726a6a130e8ab15794558591f999673f1ee7d2f3d30f6a3e1c",
"sha256:3cdf9d32690f0e235342055f0a6108eedfccf67b213b033bac747eb809809513",
"sha256:464d15b58a9fd4304c728735fc1d42cd812fd9ebc27c45b18e78418efd337c28",
"sha256:47742c33fe65f41eabed36eec2d7313a8082704b7b808752406452f766c573fc",
"sha256:4d974312a9f405628e64f475b1f5015a61fd338f0a1b61d15c4822f97d6b045b",
"sha256:511946e8d7ea5c0d6c7a53c4cb3ee48eda9ab9797cd9bf5d95829a398400354f",
"sha256:53c1a411b7690042535a4f0edf2120096a39a506adeb6c51484a232e59f2aa0c",
"sha256:5729e12a982dba3eeae650de48b06f3b9ddb51e9aee2fcaf195b7d09a96250e2",
"sha256:594a6fd2f8296583ac7babc4880c8deee7c4f05ab0141addc6bce8b8e367e996",
"sha256:59d85088e29fa7a8f87d19e97a1beae2a35821ee48d8ef6d2c4f965f26cb9f8a",
"sha256:6235fc06bcbdb40186f483ba9d5d68f888ea68aa3c8dac347e05a7c54346fbc8",
"sha256:67f9640d6b31d66c0bc54bdbe8ed50983c755521c101576a25e377a8711e8207",
"sha256:6dee142b8b3096514c96ad9e2106bf039e2fe34a704c587585b569a36df08c3c",
"sha256:738f31f23e0339785fd67652a94bc69ea49e413dfdb14dcb8c8ff383d249464e",
"sha256:7ad5d8d8cc9e43cb438b3eb4a0094dd6d4088daa767b0a24d52529361fd4c199",
"sha256:7bb32e0e33795e3b7795bb9b88cb6a9d980d3cbe26dd57642471be547708e17a",
"sha256:7ff58ea1eb8fc7e05e9a949419f031890023f8785c925b44d6da17a6a7d6e85d",
"sha256:82906d002c349cad647a7634b004825a7335f8159d0d035ae89253b4abf6f3ea",
"sha256:83ad6e5d06ef3a2884c4fa6384a20d6367b5cfe560e3b53b07c9dc65a7020e73",
"sha256:8991bdbae39cf78bcc9cd3d81f6528df1f83f2e7c23ccf6f990fa1f0b6e19708",
"sha256:8bd0f759020e87bb5d323e6283914d9bf4ae35a7307dafb2cbd1e379e720ad37",
"sha256:8bd733e47bf4c6dee2b2d8af7a1f7b0c091909b22dbb969a29b2b991e61e5ba4",
"sha256:8e5e2682cf7be766d84f462ba8828d01e00c8751a8e8e7ce12d7784ccb69a30d",
"sha256:92ac2d45794f95d1ad4cb43fa07e7e3776d86c83dc4b9918cf82831518165b4b",
"sha256:95807a3b5e78f2714acaa26a33bc2143005cc05c0217b322361a772e59f32b89",
"sha256:95922a922daa1f77cc72611747c156cfb38030ead72436a2c551d30ecef519b9",
"sha256:980fd7388e461b19a881d35013fec32c713ffea1fc37aef2f77d11f332dfd7da",
"sha256:9836394e2f4ce5f9c0a7690ee93bd90aa1adc6b054f1a57b562c5d242c903104",
"sha256:9cde8b6a6b05f68516573523f2013a3574cb2c75299d7d500f44de82ba947b80",
"sha256:a039c38d5644c691eb53cd65360921338f54e44c90b4e764605711e046c926ee",
"sha256:a10c1bd7644dc58f8862d8ba0cf9fb7fef0af01ea184ba6ce3f50ab7dfe74d5a",
"sha256:a72155928d7053bbde499d32a9c77d3f0f3d29ae72b5a121752481bcbd71e50f",
"sha256:a8d40594982ed858780e18a7e4c80415af65af0f22efa7de26bdd30bf24e1e14",
"sha256:af6dbd463a3530256abf21f675ddf87646272bc48901803a185c49d06287fbf1",
"sha256:b3ebda00c3bb8f32a740b72ec38537d54c7c09f383a4cfefb0b315860f825b08",
"sha256:c52694eae5d652361d59ecdb5a2246bff7cff13b6367a12da8499e9df56d148d",
"sha256:cdcdf7aad4bab7fd0f2938624a5a84eb4893be269f43a6701b0720b726f24df0",
"sha256:d029804c70fddf90be46ed5305c136cae15800a2300cb0f6bba96d48e770dde0",
"sha256:d09e487d6bfbe21195801323ba95c91cb3523f0fcc34016454d4d9ae9eaa57fe",
"sha256:dec2f22486d7781087b173799567cffdcc75e9fb2f1c045f05f8317ccce76a3e",
"sha256:e4f5100e66ec307cce8b52fc03e379b5d1596e9cb8d8b19dfeeccc1e68d86c96",
"sha256:e72c0749b06113f50bcb80332364c6be83a9582d6e3db3fe0b280f996dc2ef22",
"sha256:e937790f3c2c18a1cbc7da101550a84319eb48023a715914477d2e7faeaba570",
"sha256:f1f1bbc23ba1312bd8959896f46f667753b90216852d2a8cfa2d07e0cb234144",
"sha256:f33839aa091f7eef4e9078f5b7ab1b8ea4b1d8a50aeaef9fdb3611bba80869ec",
"sha256:fa9ecaf2dcef8941fb5719e16322345d730f4c40599bbf47c9753de40eb03882",
"sha256:fc21c4a05226fd39715f66c1c28214862474db50df9f08fd1aa2f96698887bc3"
],
"markers": "python_version >= '3.9'",
"version": "==4.59.1"
"version": "==4.59.2"
},
"h3": {
"hashes": [
@@ -898,7 +898,7 @@
"sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
"sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.9.0.post0"
},
"requests": {
@@ -915,9 +915,17 @@
"sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274",
"sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.17.0"
},
"suntime": {
"hashes": [
"sha256:33ac6ec2a3e14758cc690f7573f689d19c3131a6c9753f1bb54460bd70372ca4",
"sha256:4834f7907ad13dbb369904cb5f4376edc0b06c6e8a1cfc0aac1268f64d0ecdcf"
],
"index": "pypi",
"version": "==1.3.2"
},
"timezonefinder": {
"hashes": [
"sha256:00a1b2e681c182ed492280ab0da6343b5a064ad6a1f8ad5a69f94465ccb74ec6",

View File

@@ -1,21 +1,20 @@
import ephem
import requests
import datetime
import requests
from suntime import Sun, SunTimeException
from datetime import datetime
from dateutil import tz
from timezonefinder import TimezoneFinder
def get_sun_and_moon_phase(lat, long, date_str):
curr_date = datetime.strptime(date_str, "%Y%m%d")
date_str = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:8]}"
observer = ephem.Observer()
observer.lat = str(lat)
observer.lon = str(long)
observer.date = date_str
sun = ephem.Sun()
sun.compute(observer)
sunrise_time = int(observer.next_rising(sun).datetime().strftime('%Y%m%d'))
sunset_time = int(observer.next_setting(sun).datetime().strftime('%Y%m%d'))
my_tz = get_timezone(lat, long)
sun = Sun(lat, long)
sunrise_time = sun.get_sunrise_time(curr_date, tz.gettz(my_tz)).strftime("%H:%M:%S")
sunset_time = sun.get_sunset_time(curr_date, tz.gettz(my_tz)).strftime("%H:%M:%S")
date = ephem.Date(date_str)
moon = ephem.Moon()

View File

@@ -23,10 +23,15 @@ class Database:
else:
return None
def selectall(self, query, values):
def selectall(self, query, values = None):
# Query the database for the specified index
cursor = self.db.cursor()
cursor.execute(query, values)
if values is None:
cursor.execute(query)
else:
cursor.execute(query, values)
result = cursor.fetchall()
if result:
return result

View File

@@ -0,0 +1,401 @@
from data.db_connect import Database
# Game date in YYYYmmDD
def pull_training_data(database: Database, game_date: str, game_number: int, park_id: str):
# Training data
"""
- park_id
- lf_fence_distance
- lf_fence_height
- ct_fence_distance
- ct_fence_height
- rf_fence_distance
- rf_fence_height
- has_roof
- elevation
- game_date
- game_number
- day_of_week
- day_night
- home_plate_ump_id
- b1_ump_id
- b2_ump_id
- b3_ump_id
- lf_ump_id
- rf_ump_id
- temperature
- humidity
- dew_point
- apparent_temperature
- air_pressure
- precipitation
- rain
- snowfall
- cloud_cover
- wind_speed
- wind_direction
- wind_gusts
- sun_rise
- sun_set
- moon_phase
- Team being trained
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Calculated values from previous games
- win_streak
- loss_streak
- game_date
- day_of_weeki
- length_in_outs
- day_night
- completion_info
- forfeit
- protest
- park_id
- attendence
- length_in_minutes
- hits
- doubles
- triples
- homeruns
- rbis
- sacrifice_hits
- sacrifice_flies
- hit_by_pitch
- walks
- intentional_walks
- strikeouts
- stolen_bases
- caught_stealing
- grounded_double
- interference
- left_on_base
- pitchers_used
- individual_earned_runs
- earned_runs
- wild_pitches
- balks
- putouts
- assists
- errors
- passed
- double_play
- triple_play
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Opposing team
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Calculated values from previous games
- win_streak
- loss_streak
- game_date
- day_of_weeki
- length_in_outs
- day_night
- completion_info
- forfeit
- protest
- park_id
- attendence
- length_in_minutes
- hits
- doubles
- triples
- homeruns
- rbis
- sacrifice_hits
- sacrifice_flies
- hit_by_pitch
- walks
- intentional_walks
- strikeouts
- stolen_bases
- caught_stealing
- grounded_double
- interference
- left_on_base
- pitchers_used
- individual_earned_runs
- earned_runs
- wild_pitches
- balks
- putouts
- assists
- errors
- passed
- double_play
- triple_play
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Output
- 1 home team wins, 0 visiting team wins
"""
select_upcoming_game = """
SELECT
games.id,
games.game_date, games.game_number,
games.day_of_week, games.day_night,
games.home_plate_ump_id,
games.b1_ump_id, games.b2_ump_id, games.b3_ump_id,
games.lf_ump_id, games.rf_ump_id,
parks.park_id,
parks.lf_fence_distance, parks.lf_fence_height,
parks.ct_fence_distance, parks.ct_fence_height,
parks.rf_fence_distance, parks.rf_fence_height,
parks.has_roof, parks.elevation,
parks.latitude, parks.longitude,
weather.temperature, weather.humidity, weather.dew_point,
weather.apparent_temperature, weather.air_pressure,
weather.precipitation, weather.rain, weather.snowfall,
weather.cloud_cover,
weather.wind_speed, weather.wind_direction, weather.wind_gusts,
weather.sun_rise, weather.sun_set, weather.moon_phase
FROM
games
LEFT JOIN parks ON parks.park_id = games.park_id
LEFT JOIN weather ON weather.game_id = games.id
WHERE
games.game_date = ? AND games.game_number = ? AND games.park_id = ?
"""
curr_game = database.select(select_upcoming_game, [game_date, game_number, park_id])
if curr_game is None:
print(f"Failed to get game data for date: {game_date}, number: {game_number}, park: {park_id}")
return None
select_teams = """
SELECT
win, team, home,
game_num, manager_id, starting_pitcher_id,
starting_1_id, starting_1_position,
starting_2_id, starting_2_position,
starting_3_id, starting_3_position,
starting_4_id, starting_4_position,
starting_5_id, starting_5_position,
starting_6_id, starting_6_position,
starting_7_id, starting_7_position,
starting_8_id, starting_8_position,
starting_9_id, starting_9_position
FROM
team_game
WHERE
game = ?
"""
curr_team_game = database.selectall(select_teams, [curr_game[0]])
if len(curr_team_game) != 2:
print(f"Got the wrong number of games {len(curr_team_game)}")
return None
training_result = (curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1)
select_previous_games = """
SELECT
games.id,
games.game_date, games.game_number,
games.day_of_week, games.day_night,
games.length_in_outs, games.completion_info,
games.forfeit, games.protest,
games.attendence, games.length_in_minutes,
games.home_plate_ump_id,
games.b1_ump_id, games.b2_ump_id, games.b3_ump_id,
games.lf_ump_id, games.rf_ump_id,
team_game.game_num, team_game.score,
team_game.line_score, team_game.win,
team_game.hits, team_game.doubles, team_game.triples,
team_game.homeruns, team_game.rbis,
team_game.sacrifice_hits, team_game.sacrifice_flies,
team_game.hit_by_pitch, team_game.walks, team_game.intentional_walks,
team_game.strikeouts, team_game.stolen_bases,
team_game.caught_stealing, team_game.grounded_double,
team_game.interference, team_game.left_on_base,
team_game.pitchers_used,
team_game.individual_earned_runs, team_game.earned_runs,
team_game.wild_pitches, team_game.balks,
team_game.putouts, team_game.assists,
team_game.errors, team_game.passed,
team_game.double_play, team_game.triple_play,
team_game.manager_id, team_game.starting_pitcher_id,
team_game.starting_1_id, team_game.starting_1_position,
team_game.starting_2_id, team_game.starting_2_position,
team_game.starting_3_id, team_game.starting_3_position,
team_game.starting_4_id, team_game.starting_4_position,
team_game.starting_5_id, team_game.starting_5_position,
team_game.starting_6_id, team_game.starting_6_position,
team_game.starting_7_id, team_game.starting_7_position,
team_game.starting_8_id, team_game.starting_8_position,
team_game.starting_9_id, team_game.starting_9_position,
parks.park_id,
weather.temperature, weather.humidity, weather.dew_point,
weather.apparent_temperature, weather.air_pressure,
weather.precipitation, weather.rain, weather.snowfall,
weather.cloud_cover,
weather.wind_speed, weather.wind_direction, weather.wind_gusts,
weather.sun_rise, weather.sun_set, weather.moon_phase
FROM
games
LEFT JOIN parks ON parks.park_id = games.park_id
LEFT JOIN team_game ON team_game.game = games.id
LEFT JOIN weather ON weather.game_id = games.id
WHERE
games.game_date > ? AND
games.game_date < ? AND
team_game.team = ?
ORDER BY team_game.game_num ASC
"""
first_of_the_year = f"{game_date[:4]}0101"
select_previous_games_data_0 = [first_of_the_year, game_date, curr_team_game[0][1]]
select_previous_games_data_1 = [first_of_the_year, game_date, curr_team_game[1][1]]
training_data = [*curr_game[1:]]
training_data = [*training_data, *curr_team_game[0][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_0)
prev_game_data = [0] * 61
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 1:
prev_win_streak += 1
index -= 1
prev_loss_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 0:
prev_loss_streak += 1
index -= 1
index = len(prev_games) - 1
prev_game_data = [
prev_win_streak,
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
training_data = [*training_data, *curr_team_game[1][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_1)
prev_game_data = [0] * 61
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 1:
prev_win_streak += 1
index -= 1
prev_loss_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 0:
prev_loss_streak += 1
index -= 1
index = len(prev_games) - 1
prev_game_data = [
prev_win_streak,
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
return (training_result, training_data)

View File

Can't render this file because it is too large.

View File

Can't render this file because it is too large.

View File

Can't render this file because it is too large.

View File

@@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS parks (
park_id CHAR(5) NOT NULL,
park_name VARCHAR,
lf_fence_distance SMALLINT UNSIGNED,
lf_fence_height, SMALLINT UNSIGNED,
lf_fence_height SMALLINT UNSIGNED,
ct_fence_distance SMALLINT UNSIGNED,
ct_fence_height SMALLINT UNSIGNED,
rf_fence_distance SMALLINT UNSIGNED,
@@ -59,6 +59,8 @@ CREATE TABLE IF NOT EXISTS team_game (
game_num TINYINT UNSIGNED,
score TINYINT UNSIGNED,
line_score CHAR(9),
win BIT,
home BIT,
at_bats TINYINT UNSIGNED,
hits TINYINT UNSIGNED,

2430
data/stats/gl2024.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,33 +11,17 @@ class Importer:
def __init__(self, database: Database):
self.database = database
def parse_all_data(self, source_dir, dest_dir):
# Ensure the destination directory exists
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
def parse_all_data(self, source_dir):
# List all files in the source and destination directories
source_files = set(os.listdir(source_dir))
for filename in os.listdir(dest_dir):
if filename.endswith('.csv'):
source_files.discard(filename)
dest_files = set(os.listdir(dest_dir))
# Find files that are in the source but not in the destination
missing_files = source_files - dest_files
# Copy any missing CSV files from the source directory to the destination directory
for filename in missing_files:
for filename in source_files:
src_file = os.path.join(source_dir, filename)
dest_file = os.path.join(dest_dir, filename)
if self.parse_one_file(f"{source_dir}/{filename}"):
try:
shutil.copy(src_file, dest_file)
print(f"Copied {filename} to {dest_dir}")
except Exception as e:
print(f"Failed to copy {filename}: {e}")
if not self.parse_one_file(f"{source_dir}/{filename}"):
print(f"Failed to parse {source_dir}/{filename}")
def parse_one_file(self, filepath):
bb_dict = {}
@@ -61,6 +45,7 @@ class Importer:
time.sleep(60*60*24)
if not self.populate_database_with_stats(game):
print(f"Failed to parse and populate {game}")
return False
return True
@@ -107,7 +92,7 @@ class Importer:
INSERT INTO team_game
(
game, team, game_num,
score, line_score, at_bats,
score, line_score, win, home, at_bats,
hits, doubles, triples,
homeruns, rbis, sacrifice_hits,
sacrifice_flies, hit_by_pitch, walks,
@@ -132,7 +117,7 @@ class Importer:
VALUES
(
?, ?, ?,
?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?,
?, ?, ?,
?, ?, ?,
@@ -205,9 +190,10 @@ class Importer:
game_id = self.database.insert(insert_game, game_data)
visiting_win_loss = int(game_stats["visiting-score"]) > int(game_stats["home-score"])
visiting_team_data = [
game_id, game_stats["visiting-team"], game_stats["visiting-game-num"],
game_stats["visiting-score"], game_stats["visiting-line-scores"], game_stats["visiting-at-bats"],
game_stats["visiting-score"], game_stats["visiting-line-scores"], visiting_win_loss, 0, game_stats["visiting-at-bats"],
game_stats["visiting-hits"], game_stats["visiting-doubles"], game_stats["visiting-triples"],
game_stats["visiting-homeruns"], game_stats["visiting-rbi"], game_stats["visiting-sacrifice-hits"],
game_stats["visiting-sacrifice-flies"], game_stats["visiting-hit-by-pitch"], game_stats["visiting-walks"],
@@ -230,9 +216,10 @@ class Importer:
game_stats["visiting-starting-9-position"]
]
home_win_loss = int(game_stats["home-score"]) > int(game_stats["visiting-score"])
home_team_data = [
game_id, game_stats["home-team"], game_stats["home-game-num"],
game_stats["home-score"], game_stats["home-line-scores"], game_stats["home-at-bats"],
game_stats["home-score"], game_stats["home-line-scores"], home_win_loss, 1, game_stats["home-at-bats"],
game_stats["home-hits"], game_stats["home-doubles"], game_stats["home-triples"],
game_stats["home-homeruns"], game_stats["home-rbi"], game_stats["home-sacrifice-hits"],
game_stats["home-sacrifice-flies"], game_stats["home-hit-by-pitch"], game_stats["home-walks"],

20
main.py
View File

@@ -1,8 +1,10 @@
import numpy as np # helps with the math
import matplotlib.pyplot as plt # to plot error during training
from data.get_data import pull_training_data
from data.db_connect import Database
from data.stats_importer import Importer
from data.build_weather import get_weather, get_sun_and_moon_phase
from data.stats_importer import Importer
# input data
inputs = np.array([[0, 0, 1, 0],
@@ -15,19 +17,27 @@ inputs = np.array([[0, 0, 1, 0],
outputs = np.array([[0], [0], [0], [1], [1], [1]])
if __name__ == '__main__':
db_file = "./database/baseball.db"
db_conn = Database(db_file)
query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
all_games = db_conn.selectall(query)
for game in all_games:
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
"""
build_db_path = "./data/sql/build_db.sql"
fill_parks_path = "./data/sql/prefill_parks.sql"
fill_teams_path = "./data/sql/prefill_teams.sql"
db_file = "./database/baseball.db"
db_conn = Database(db_file)
db_conn.run_sql_file(build_db_path)
db_conn.run_sql_file(fill_parks_path)
db_conn.run_sql_file(fill_teams_path)
imp = Importer(db_conn)
imp.parse_all_data("./data/stats/to_import", "./data/stats/imported/")
imp.parse_all_data("./data/stats/", "./data/stats/imported/")
"""
"""
else: