Merge pull request 'Starting to add the function to pull out data' (#14) from 5-get-data into main

Reviewed-on: #14
This commit was merged in pull request #14.
This commit is contained in:
2025-09-18 19:11:13 +00:00
13 changed files with 2949 additions and 107 deletions

3
.gitignore vendored
View File

@@ -2,5 +2,4 @@
.Trash-1000 .Trash-1000
target target
database database
data/stats/imported data/__pycache__
data/__pycache__

View File

@@ -8,6 +8,7 @@ ephem = "*"
matplotlib = "*" matplotlib = "*"
numpy = "*" numpy = "*"
requests = "*" requests = "*"
suntime = "*"
timezonefinder = "*" timezonefinder = "*"
[dev-packages] [dev-packages]

132
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "0763149251463ba577d24ee5a2399615cd74ddb51f0d7d75bbd777d9b499164f" "sha256": "7ad75fcb917b3a288fd1156f75a1f0ece49414a473d45695e594becbd9eb1057"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@@ -383,67 +383,67 @@
}, },
"fonttools": { "fonttools": {
"hashes": [ "hashes": [
"sha256:01158376b8a418a0bae9625c476cebfcfcb5e6761e9d243b219cd58341e7afbb", "sha256:036cd87a2dbd7ef72f7b68df8314ced00b8d9973aee296f2464d06a836aeb9a9",
"sha256:02e4fdf27c550dded10fe038a5981c29f81cb9bc649ff2eaa48e80dab8998f97", "sha256:0476ea74161322e08c7a982f83558a2b81b491509984523a1a540baf8611cc31",
"sha256:075f745d539a998cd92cb84c339a82e53e49114ec62aaea8307c80d3ad3aef3a", "sha256:0ec99f9bdfee9cdb4a9172f9e8fd578cce5feb231f598909e0aecf5418da4f25",
"sha256:0b9e4fa7eaf046ed6ac470f6033d52c052481ff7a6e0a92373d14f556f298dc0", "sha256:12dc4670e6e6cc4553e8de190f86a549e08ca83a036363115d94a2d67488831e",
"sha256:1017413cdc8555dce7ee23720da490282ab7ec1cf022af90a241f33f9a49afc4", "sha256:14870930181493b1d740b6f25483e20185e5aea58aec7d266d16da7be822b4bb",
"sha256:1ab4c1fb45f2984b8b4a3face7cff0f67f9766e9414cbb6fd061e9d77819de98", "sha256:1603b85d5922042563eea518e272b037baf273b9a57d0f190852b0b075079000",
"sha256:2a2d0d33307f6ad3a2086a95dd607c202ea8852fa9fb52af9b48811154d1428a", "sha256:1647201af10993090120da2e66e9526c4e20e88859f3e34aa05b8c24ded2a564",
"sha256:2aeb829ad9d41a2ef17cab8bb5d186049ba38a840f10352e654aa9062ec32dc1", "sha256:1a1bfe5378962825dabe741720885e8b9ae9745ec7ecc4a5ec1f1ce59a6062bf",
"sha256:2beb5bfc4887a3130f8625349605a3a45fe345655ce6031d1bac11017454b943", "sha256:2543b81641ea5b8ddfcae7926e62aafd5abc604320b1b119e5218c014a7a5d3c",
"sha256:39dfd42cc2dc647b2c5469bc7a5b234d9a49e72565b96dd14ae6f11c2c59ef15", "sha256:2a159e36ae530650acd13604f364b3a2477eff7408dcac6a640d74a3744d2514",
"sha256:412a5fd6345872a7c249dac5bcce380393f40c1c316ac07f447bc17d51900922", "sha256:381bde13216ba09489864467f6bc0c57997bd729abfbb1ce6f807ba42c06cceb",
"sha256:419f16d750d78e6d704bfe97b48bba2f73b15c9418f817d0cb8a9ca87a5b94bf", "sha256:39ad9612c6a622726a6a130e8ab15794558591f999673f1ee7d2f3d30f6a3e1c",
"sha256:42052b56d176f8b315fbc09259439c013c0cb2109df72447148aeda677599612", "sha256:3cdf9d32690f0e235342055f0a6108eedfccf67b213b033bac747eb809809513",
"sha256:43ab814bbba5f02a93a152ee61a04182bb5809bd2bc3609f7822e12c53ae2c91", "sha256:464d15b58a9fd4304c728735fc1d42cd812fd9ebc27c45b18e78418efd337c28",
"sha256:43d177cd0e847ea026fedd9f099dc917da136ed8792d142298a252836390c478", "sha256:47742c33fe65f41eabed36eec2d7313a8082704b7b808752406452f766c573fc",
"sha256:4909cce2e35706f3d18c54d3dcce0414ba5e0fb436a454dffec459c61653b513", "sha256:4d974312a9f405628e64f475b1f5015a61fd338f0a1b61d15c4822f97d6b045b",
"sha256:4f04c3ffbfa0baafcbc550657cf83657034eb63304d27b05cff1653b448ccff6", "sha256:511946e8d7ea5c0d6c7a53c4cb3ee48eda9ab9797cd9bf5d95829a398400354f",
"sha256:5265bc52ed447187d39891b5f21d7217722735d0de9fe81326566570d12851a9", "sha256:53c1a411b7690042535a4f0edf2120096a39a506adeb6c51484a232e59f2aa0c",
"sha256:57a3708ca6bfccb790f585fa6d8f29432ec329618a09ff94c16bcb3c55994643", "sha256:5729e12a982dba3eeae650de48b06f3b9ddb51e9aee2fcaf195b7d09a96250e2",
"sha256:58a8844f96cff35860647a65345bfca87f47a2494bfb4bef754e58c082511443", "sha256:594a6fd2f8296583ac7babc4880c8deee7c4f05ab0141addc6bce8b8e367e996",
"sha256:5b9b4c35b3be45e5bc774d3fc9608bbf4f9a8d371103b858c80edbeed31dd5aa", "sha256:59d85088e29fa7a8f87d19e97a1beae2a35821ee48d8ef6d2c4f965f26cb9f8a",
"sha256:5c6d8d773470a5107052874341ed3c487c16ecd179976d81afed89dea5cd7406", "sha256:6235fc06bcbdb40186f483ba9d5d68f888ea68aa3c8dac347e05a7c54346fbc8",
"sha256:5d29ab70658d2ec19422b25e6ace00a0b0ae4181ee31e03335eaef53907d2d83", "sha256:67f9640d6b31d66c0bc54bdbe8ed50983c755521c101576a25e377a8711e8207",
"sha256:5f3f021cea6e36410874763f4a517a5e2d6ac36ca8f95521f3a9fdaad0fe73dc", "sha256:6dee142b8b3096514c96ad9e2106bf039e2fe34a704c587585b569a36df08c3c",
"sha256:6065fdec8ff44c32a483fd44abe5bcdb40dd5e2571a5034b555348f2b3a52cea", "sha256:738f31f23e0339785fd67652a94bc69ea49e413dfdb14dcb8c8ff383d249464e",
"sha256:647db657073672a8330608970a984d51573557f328030566521bc03415535042", "sha256:7ad5d8d8cc9e43cb438b3eb4a0094dd6d4088daa767b0a24d52529361fd4c199",
"sha256:652159e8214eb4856e8387ebcd6b6bd336ee258cbeb639c8be52005b122b9609", "sha256:7bb32e0e33795e3b7795bb9b88cb6a9d980d3cbe26dd57642471be547708e17a",
"sha256:729367c91eb1ee84e61a733acc485065a00590618ca31c438e7dd4d600c01486", "sha256:7ff58ea1eb8fc7e05e9a949419f031890023f8785c925b44d6da17a6a7d6e85d",
"sha256:74995b402ad09822a4c8002438e54940d9f1ecda898d2bb057729d7da983e4cb", "sha256:82906d002c349cad647a7634b004825a7335f8159d0d035ae89253b4abf6f3ea",
"sha256:8156b11c0d5405810d216f53907bd0f8b982aa5f1e7e3127ab3be1a4062154ff", "sha256:83ad6e5d06ef3a2884c4fa6384a20d6367b5cfe560e3b53b07c9dc65a7020e73",
"sha256:8387876a8011caec52d327d5e5bca705d9399ec4b17afb8b431ec50d47c17d23", "sha256:8991bdbae39cf78bcc9cd3d81f6528df1f83f2e7c23ccf6f990fa1f0b6e19708",
"sha256:89d9957b54246c6251345297dddf77a84d2c19df96af30d2de24093bbdf0528b", "sha256:8bd0f759020e87bb5d323e6283914d9bf4ae35a7307dafb2cbd1e379e720ad37",
"sha256:8c8758a7d97848fc8b514b3d9b4cb95243714b2f838dde5e1e3c007375de6214", "sha256:8bd733e47bf4c6dee2b2d8af7a1f7b0c091909b22dbb969a29b2b991e61e5ba4",
"sha256:8ee39da0227950f88626c91e219659e6cd725ede826b1c13edd85fc4cec9bbe6", "sha256:8e5e2682cf7be766d84f462ba8828d01e00c8751a8e8e7ce12d7784ccb69a30d",
"sha256:8f8ef66ac6db450193ed150e10b3b45dde7aded10c5d279968bc63368027f62b", "sha256:92ac2d45794f95d1ad4cb43fa07e7e3776d86c83dc4b9918cf82831518165b4b",
"sha256:94f9721a564978a10d5c12927f99170d18e9a32e5a727c61eae56f956a4d118b", "sha256:95807a3b5e78f2714acaa26a33bc2143005cc05c0217b322361a772e59f32b89",
"sha256:a960b09ff50c2e87864e83f352e5a90bcf1ad5233df579b1124660e1643de272", "sha256:95922a922daa1f77cc72611747c156cfb38030ead72436a2c551d30ecef519b9",
"sha256:ac216a2980a2d2b3b88c68a24f8a9bfb203e2490e991b3238502ad8f1e7bfed0", "sha256:980fd7388e461b19a881d35013fec32c713ffea1fc37aef2f77d11f332dfd7da",
"sha256:b11bc177a0d428b37890825d7d025040d591aa833f85f8d8878ed183354f47df", "sha256:9836394e2f4ce5f9c0a7690ee93bd90aa1adc6b054f1a57b562c5d242c903104",
"sha256:bcd52eaa5c4c593ae9f447c1d13e7e4a00ca21d755645efa660b6999425b3c88", "sha256:9cde8b6a6b05f68516573523f2013a3574cb2c75299d7d500f44de82ba947b80",
"sha256:bf5fb864f80061a40c1747e0dbc4f6e738de58dd6675b07eb80bd06a93b063c4", "sha256:a039c38d5644c691eb53cd65360921338f54e44c90b4e764605711e046c926ee",
"sha256:c29ea087843e27a7cffc78406d32a5abf166d92afde7890394e9e079c9b4dbe9", "sha256:a10c1bd7644dc58f8862d8ba0cf9fb7fef0af01ea184ba6ce3f50ab7dfe74d5a",
"sha256:c2b0597522d4c5bb18aa5cf258746a2d4a90f25878cbe865e4d35526abd1b9fc", "sha256:a72155928d7053bbde499d32a9c77d3f0f3d29ae72b5a121752481bcbd71e50f",
"sha256:c536f8a852e8d3fa71dde1ec03892aee50be59f7154b533f0bf3c1174cfd5126", "sha256:a8d40594982ed858780e18a7e4c80415af65af0f22efa7de26bdd30bf24e1e14",
"sha256:c735e385e30278c54f43a0d056736942023c9043f84ee1021eff9fd616d17693", "sha256:af6dbd463a3530256abf21f675ddf87646272bc48901803a185c49d06287fbf1",
"sha256:c866eef7a0ba320486ade6c32bfc12813d1a5db8567e6904fb56d3d40acc5116", "sha256:b3ebda00c3bb8f32a740b72ec38537d54c7c09f383a4cfefb0b315860f825b08",
"sha256:cf7c5089d37787387123f1cb8f1793a47c5e1e3d1e4e7bfbc1cc96e0f925eabe", "sha256:c52694eae5d652361d59ecdb5a2246bff7cff13b6367a12da8499e9df56d148d",
"sha256:d31dc137ed8ec71dbc446949eba9035926e6e967b90378805dcf667ff57cabb1", "sha256:cdcdf7aad4bab7fd0f2938624a5a84eb4893be269f43a6701b0720b726f24df0",
"sha256:d5c3bfdc9663f3d4b565f9cb3b8c1efb3e178186435b45105bde7328cfddd7fe", "sha256:d029804c70fddf90be46ed5305c136cae15800a2300cb0f6bba96d48e770dde0",
"sha256:d601b153e51a5a6221f0d4ec077b6bfc6ac35bfe6c19aeaa233d8990b2b71726", "sha256:d09e487d6bfbe21195801323ba95c91cb3523f0fcc34016454d4d9ae9eaa57fe",
"sha256:e1ca10da138c300f768bb68e40e5b20b6ecfbd95f91aac4cc15010b6b9d65455", "sha256:dec2f22486d7781087b173799567cffdcc75e9fb2f1c045f05f8317ccce76a3e",
"sha256:e3680884189e2b7c3549f6d304376e64711fd15118e4b1ae81940cb6b1eaa267", "sha256:e4f5100e66ec307cce8b52fc03e379b5d1596e9cb8d8b19dfeeccc1e68d86c96",
"sha256:e54437651e1440ee53a95e6ceb6ee440b67a3d348c76f45f4f48de1a5ecab019", "sha256:e72c0749b06113f50bcb80332364c6be83a9582d6e3db3fe0b280f996dc2ef22",
"sha256:e90a89e52deb56b928e761bb5b5f65f13f669bfd96ed5962975debea09776a23", "sha256:e937790f3c2c18a1cbc7da101550a84319eb48023a715914477d2e7faeaba570",
"sha256:e9ad4ce044e3236f0814c906ccce8647046cc557539661e35211faadf76f283b", "sha256:f1f1bbc23ba1312bd8959896f46f667753b90216852d2a8cfa2d07e0cb234144",
"sha256:ea03f1da0d722fe3c2278a05957e6550175571a4894fbf9d178ceef4a3783d2b", "sha256:f33839aa091f7eef4e9078f5b7ab1b8ea4b1d8a50aeaef9fdb3611bba80869ec",
"sha256:efbec204fa9f877641747f2d9612b2b656071390d7a7ef07a9dbf0ecf9c7195c", "sha256:fa9ecaf2dcef8941fb5719e16322345d730f4c40599bbf47c9753de40eb03882",
"sha256:fb13823a74b3a9204a8ed76d3d6d5ec12e64cc5bc44914eb9ff1cdac04facd43" "sha256:fc21c4a05226fd39715f66c1c28214862474db50df9f08fd1aa2f96698887bc3"
], ],
"markers": "python_version >= '3.9'", "markers": "python_version >= '3.9'",
"version": "==4.59.1" "version": "==4.59.2"
}, },
"h3": { "h3": {
"hashes": [ "hashes": [
@@ -898,7 +898,7 @@
"sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
"sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.9.0.post0" "version": "==2.9.0.post0"
}, },
"requests": { "requests": {
@@ -915,9 +915,17 @@
"sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274",
"sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81" "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.17.0" "version": "==1.17.0"
}, },
"suntime": {
"hashes": [
"sha256:33ac6ec2a3e14758cc690f7573f689d19c3131a6c9753f1bb54460bd70372ca4",
"sha256:4834f7907ad13dbb369904cb5f4376edc0b06c6e8a1cfc0aac1268f64d0ecdcf"
],
"index": "pypi",
"version": "==1.3.2"
},
"timezonefinder": { "timezonefinder": {
"hashes": [ "hashes": [
"sha256:00a1b2e681c182ed492280ab0da6343b5a064ad6a1f8ad5a69f94465ccb74ec6", "sha256:00a1b2e681c182ed492280ab0da6343b5a064ad6a1f8ad5a69f94465ccb74ec6",

View File

@@ -1,21 +1,20 @@
import ephem import ephem
import requests
import datetime import datetime
import requests
from suntime import Sun, SunTimeException
from datetime import datetime
from dateutil import tz
from timezonefinder import TimezoneFinder from timezonefinder import TimezoneFinder
def get_sun_and_moon_phase(lat, long, date_str): def get_sun_and_moon_phase(lat, long, date_str):
curr_date = datetime.strptime(date_str, "%Y%m%d")
date_str = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:8]}" date_str = f"{date_str[:4]}/{date_str[4:6]}/{date_str[6:8]}"
observer = ephem.Observer() my_tz = get_timezone(lat, long)
observer.lat = str(lat) sun = Sun(lat, long)
observer.lon = str(long) sunrise_time = sun.get_sunrise_time(curr_date, tz.gettz(my_tz)).strftime("%H:%M:%S")
observer.date = date_str sunset_time = sun.get_sunset_time(curr_date, tz.gettz(my_tz)).strftime("%H:%M:%S")
sun = ephem.Sun()
sun.compute(observer)
sunrise_time = int(observer.next_rising(sun).datetime().strftime('%Y%m%d'))
sunset_time = int(observer.next_setting(sun).datetime().strftime('%Y%m%d'))
date = ephem.Date(date_str) date = ephem.Date(date_str)
moon = ephem.Moon() moon = ephem.Moon()

View File

@@ -23,10 +23,15 @@ class Database:
else: else:
return None return None
def selectall(self, query, values): def selectall(self, query, values = None):
# Query the database for the specified index # Query the database for the specified index
cursor = self.db.cursor() cursor = self.db.cursor()
cursor.execute(query, values)
if values is None:
cursor.execute(query)
else:
cursor.execute(query, values)
result = cursor.fetchall() result = cursor.fetchall()
if result: if result:
return result return result

View File

@@ -0,0 +1,401 @@
from data.db_connect import Database
# Game date in YYYYmmDD
def pull_training_data(database: Database, game_date: str, game_number: int, park_id: str):
# Training data
"""
- park_id
- lf_fence_distance
- lf_fence_height
- ct_fence_distance
- ct_fence_height
- rf_fence_distance
- rf_fence_height
- has_roof
- elevation
- game_date
- game_number
- day_of_week
- day_night
- home_plate_ump_id
- b1_ump_id
- b2_ump_id
- b3_ump_id
- lf_ump_id
- rf_ump_id
- temperature
- humidity
- dew_point
- apparent_temperature
- air_pressure
- precipitation
- rain
- snowfall
- cloud_cover
- wind_speed
- wind_direction
- wind_gusts
- sun_rise
- sun_set
- moon_phase
- Team being trained
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Calculated values from previous games
- win_streak
- loss_streak
- game_date
- day_of_weeki
- length_in_outs
- day_night
- completion_info
- forfeit
- protest
- park_id
- attendence
- length_in_minutes
- hits
- doubles
- triples
- homeruns
- rbis
- sacrifice_hits
- sacrifice_flies
- hit_by_pitch
- walks
- intentional_walks
- strikeouts
- stolen_bases
- caught_stealing
- grounded_double
- interference
- left_on_base
- pitchers_used
- individual_earned_runs
- earned_runs
- wild_pitches
- balks
- putouts
- assists
- errors
- passed
- double_play
- triple_play
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Opposing team
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Calculated values from previous games
- win_streak
- loss_streak
- game_date
- day_of_weeki
- length_in_outs
- day_night
- completion_info
- forfeit
- protest
- park_id
- attendence
- length_in_minutes
- hits
- doubles
- triples
- homeruns
- rbis
- sacrifice_hits
- sacrifice_flies
- hit_by_pitch
- walks
- intentional_walks
- strikeouts
- stolen_bases
- caught_stealing
- grounded_double
- interference
- left_on_base
- pitchers_used
- individual_earned_runs
- earned_runs
- wild_pitches
- balks
- putouts
- assists
- errors
- passed
- double_play
- triple_play
- game_num
- manager_id
- starting_pitcher_id
- starting_1_id
- starting_1_position
- starting_2_id
- starting_2_position
- starting_3_id
- starting_3_position
- starting_4_id
- starting_4_position
- starting_5_id
- starting_5_position
- starting_6_id
- starting_6_position
- starting_7_id
- starting_7_position
- starting_8_id
- starting_8_position
- starting_9_id
- starting_9_position
- Output
- 1 home team wins, 0 visiting team wins
"""
select_upcoming_game = """
SELECT
games.id,
games.game_date, games.game_number,
games.day_of_week, games.day_night,
games.home_plate_ump_id,
games.b1_ump_id, games.b2_ump_id, games.b3_ump_id,
games.lf_ump_id, games.rf_ump_id,
parks.park_id,
parks.lf_fence_distance, parks.lf_fence_height,
parks.ct_fence_distance, parks.ct_fence_height,
parks.rf_fence_distance, parks.rf_fence_height,
parks.has_roof, parks.elevation,
parks.latitude, parks.longitude,
weather.temperature, weather.humidity, weather.dew_point,
weather.apparent_temperature, weather.air_pressure,
weather.precipitation, weather.rain, weather.snowfall,
weather.cloud_cover,
weather.wind_speed, weather.wind_direction, weather.wind_gusts,
weather.sun_rise, weather.sun_set, weather.moon_phase
FROM
games
LEFT JOIN parks ON parks.park_id = games.park_id
LEFT JOIN weather ON weather.game_id = games.id
WHERE
games.game_date = ? AND games.game_number = ? AND games.park_id = ?
"""
curr_game = database.select(select_upcoming_game, [game_date, game_number, park_id])
if curr_game is None:
print(f"Failed to get game data for date: {game_date}, number: {game_number}, park: {park_id}")
return None
select_teams = """
SELECT
win, team, home,
game_num, manager_id, starting_pitcher_id,
starting_1_id, starting_1_position,
starting_2_id, starting_2_position,
starting_3_id, starting_3_position,
starting_4_id, starting_4_position,
starting_5_id, starting_5_position,
starting_6_id, starting_6_position,
starting_7_id, starting_7_position,
starting_8_id, starting_8_position,
starting_9_id, starting_9_position
FROM
team_game
WHERE
game = ?
"""
curr_team_game = database.selectall(select_teams, [curr_game[0]])
if len(curr_team_game) != 2:
print(f"Got the wrong number of games {len(curr_team_game)}")
return None
training_result = (curr_team_game[0][0] == 1 and curr_team_game[0][2] == 1)
select_previous_games = """
SELECT
games.id,
games.game_date, games.game_number,
games.day_of_week, games.day_night,
games.length_in_outs, games.completion_info,
games.forfeit, games.protest,
games.attendence, games.length_in_minutes,
games.home_plate_ump_id,
games.b1_ump_id, games.b2_ump_id, games.b3_ump_id,
games.lf_ump_id, games.rf_ump_id,
team_game.game_num, team_game.score,
team_game.line_score, team_game.win,
team_game.hits, team_game.doubles, team_game.triples,
team_game.homeruns, team_game.rbis,
team_game.sacrifice_hits, team_game.sacrifice_flies,
team_game.hit_by_pitch, team_game.walks, team_game.intentional_walks,
team_game.strikeouts, team_game.stolen_bases,
team_game.caught_stealing, team_game.grounded_double,
team_game.interference, team_game.left_on_base,
team_game.pitchers_used,
team_game.individual_earned_runs, team_game.earned_runs,
team_game.wild_pitches, team_game.balks,
team_game.putouts, team_game.assists,
team_game.errors, team_game.passed,
team_game.double_play, team_game.triple_play,
team_game.manager_id, team_game.starting_pitcher_id,
team_game.starting_1_id, team_game.starting_1_position,
team_game.starting_2_id, team_game.starting_2_position,
team_game.starting_3_id, team_game.starting_3_position,
team_game.starting_4_id, team_game.starting_4_position,
team_game.starting_5_id, team_game.starting_5_position,
team_game.starting_6_id, team_game.starting_6_position,
team_game.starting_7_id, team_game.starting_7_position,
team_game.starting_8_id, team_game.starting_8_position,
team_game.starting_9_id, team_game.starting_9_position,
parks.park_id,
weather.temperature, weather.humidity, weather.dew_point,
weather.apparent_temperature, weather.air_pressure,
weather.precipitation, weather.rain, weather.snowfall,
weather.cloud_cover,
weather.wind_speed, weather.wind_direction, weather.wind_gusts,
weather.sun_rise, weather.sun_set, weather.moon_phase
FROM
games
LEFT JOIN parks ON parks.park_id = games.park_id
LEFT JOIN team_game ON team_game.game = games.id
LEFT JOIN weather ON weather.game_id = games.id
WHERE
games.game_date > ? AND
games.game_date < ? AND
team_game.team = ?
ORDER BY team_game.game_num ASC
"""
first_of_the_year = f"{game_date[:4]}0101"
select_previous_games_data_0 = [first_of_the_year, game_date, curr_team_game[0][1]]
select_previous_games_data_1 = [first_of_the_year, game_date, curr_team_game[1][1]]
training_data = [*curr_game[1:]]
training_data = [*training_data, *curr_team_game[0][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_0)
prev_game_data = [0] * 61
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 1:
prev_win_streak += 1
index -= 1
prev_loss_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 0:
prev_loss_streak += 1
index -= 1
index = len(prev_games) - 1
prev_game_data = [
prev_win_streak,
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
training_data = [*training_data, *curr_team_game[1][1:]]
prev_games = database.selectall(select_previous_games, select_previous_games_data_1)
prev_game_data = [0] * 61
if prev_games is not None:
prev_win_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 1:
prev_win_streak += 1
index -= 1
prev_loss_streak = 0
index = len(prev_games) - 1
while index > 0 and prev_games[index][20] == 0:
prev_loss_streak += 1
index -= 1
index = len(prev_games) - 1
prev_game_data = [
prev_win_streak,
prev_loss_streak,
*prev_games[index][1:]
]
training_data = [*training_data, *prev_game_data]
return (training_result, training_data)

View File

Can't render this file because it is too large.

View File

Can't render this file because it is too large.

View File

Can't render this file because it is too large.

View File

@@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS parks (
park_id CHAR(5) NOT NULL, park_id CHAR(5) NOT NULL,
park_name VARCHAR, park_name VARCHAR,
lf_fence_distance SMALLINT UNSIGNED, lf_fence_distance SMALLINT UNSIGNED,
lf_fence_height, SMALLINT UNSIGNED, lf_fence_height SMALLINT UNSIGNED,
ct_fence_distance SMALLINT UNSIGNED, ct_fence_distance SMALLINT UNSIGNED,
ct_fence_height SMALLINT UNSIGNED, ct_fence_height SMALLINT UNSIGNED,
rf_fence_distance SMALLINT UNSIGNED, rf_fence_distance SMALLINT UNSIGNED,
@@ -59,6 +59,8 @@ CREATE TABLE IF NOT EXISTS team_game (
game_num TINYINT UNSIGNED, game_num TINYINT UNSIGNED,
score TINYINT UNSIGNED, score TINYINT UNSIGNED,
line_score CHAR(9), line_score CHAR(9),
win BIT,
home BIT,
at_bats TINYINT UNSIGNED, at_bats TINYINT UNSIGNED,
hits TINYINT UNSIGNED, hits TINYINT UNSIGNED,

2430
data/stats/gl2024.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,33 +11,17 @@ class Importer:
def __init__(self, database: Database): def __init__(self, database: Database):
self.database = database self.database = database
def parse_all_data(self, source_dir, dest_dir): def parse_all_data(self, source_dir):
# Ensure the destination directory exists
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
# List all files in the source and destination directories # List all files in the source and destination directories
source_files = set(os.listdir(source_dir)) source_files = set(os.listdir(source_dir))
for filename in os.listdir(dest_dir):
if filename.endswith('.csv'):
source_files.discard(filename)
dest_files = set(os.listdir(dest_dir))
# Find files that are in the source but not in the destination
missing_files = source_files - dest_files
# Copy any missing CSV files from the source directory to the destination directory # Copy any missing CSV files from the source directory to the destination directory
for filename in missing_files: for filename in source_files:
src_file = os.path.join(source_dir, filename) src_file = os.path.join(source_dir, filename)
dest_file = os.path.join(dest_dir, filename) dest_file = os.path.join(dest_dir, filename)
if self.parse_one_file(f"{source_dir}/{filename}"): if not self.parse_one_file(f"{source_dir}/{filename}"):
try: print(f"Failed to parse {source_dir}/{filename}")
shutil.copy(src_file, dest_file)
print(f"Copied {filename} to {dest_dir}")
except Exception as e:
print(f"Failed to copy {filename}: {e}")
def parse_one_file(self, filepath): def parse_one_file(self, filepath):
bb_dict = {} bb_dict = {}
@@ -61,6 +45,7 @@ class Importer:
time.sleep(60*60*24) time.sleep(60*60*24)
if not self.populate_database_with_stats(game): if not self.populate_database_with_stats(game):
print(f"Failed to parse and populate {game}")
return False return False
return True return True
@@ -107,7 +92,7 @@ class Importer:
INSERT INTO team_game INSERT INTO team_game
( (
game, team, game_num, game, team, game_num,
score, line_score, at_bats, score, line_score, win, home, at_bats,
hits, doubles, triples, hits, doubles, triples,
homeruns, rbis, sacrifice_hits, homeruns, rbis, sacrifice_hits,
sacrifice_flies, hit_by_pitch, walks, sacrifice_flies, hit_by_pitch, walks,
@@ -132,7 +117,7 @@ class Importer:
VALUES VALUES
( (
?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?,
@@ -205,9 +190,10 @@ class Importer:
game_id = self.database.insert(insert_game, game_data) game_id = self.database.insert(insert_game, game_data)
visiting_win_loss = int(game_stats["visiting-score"]) > int(game_stats["home-score"])
visiting_team_data = [ visiting_team_data = [
game_id, game_stats["visiting-team"], game_stats["visiting-game-num"], game_id, game_stats["visiting-team"], game_stats["visiting-game-num"],
game_stats["visiting-score"], game_stats["visiting-line-scores"], game_stats["visiting-at-bats"], game_stats["visiting-score"], game_stats["visiting-line-scores"], visiting_win_loss, 0, game_stats["visiting-at-bats"],
game_stats["visiting-hits"], game_stats["visiting-doubles"], game_stats["visiting-triples"], game_stats["visiting-hits"], game_stats["visiting-doubles"], game_stats["visiting-triples"],
game_stats["visiting-homeruns"], game_stats["visiting-rbi"], game_stats["visiting-sacrifice-hits"], game_stats["visiting-homeruns"], game_stats["visiting-rbi"], game_stats["visiting-sacrifice-hits"],
game_stats["visiting-sacrifice-flies"], game_stats["visiting-hit-by-pitch"], game_stats["visiting-walks"], game_stats["visiting-sacrifice-flies"], game_stats["visiting-hit-by-pitch"], game_stats["visiting-walks"],
@@ -230,9 +216,10 @@ class Importer:
game_stats["visiting-starting-9-position"] game_stats["visiting-starting-9-position"]
] ]
home_win_loss = int(game_stats["home-score"]) > int(game_stats["visiting-score"])
home_team_data = [ home_team_data = [
game_id, game_stats["home-team"], game_stats["home-game-num"], game_id, game_stats["home-team"], game_stats["home-game-num"],
game_stats["home-score"], game_stats["home-line-scores"], game_stats["home-at-bats"], game_stats["home-score"], game_stats["home-line-scores"], home_win_loss, 1, game_stats["home-at-bats"],
game_stats["home-hits"], game_stats["home-doubles"], game_stats["home-triples"], game_stats["home-hits"], game_stats["home-doubles"], game_stats["home-triples"],
game_stats["home-homeruns"], game_stats["home-rbi"], game_stats["home-sacrifice-hits"], game_stats["home-homeruns"], game_stats["home-rbi"], game_stats["home-sacrifice-hits"],
game_stats["home-sacrifice-flies"], game_stats["home-hit-by-pitch"], game_stats["home-walks"], game_stats["home-sacrifice-flies"], game_stats["home-hit-by-pitch"], game_stats["home-walks"],

20
main.py
View File

@@ -1,8 +1,10 @@
import numpy as np # helps with the math import numpy as np # helps with the math
import matplotlib.pyplot as plt # to plot error during training import matplotlib.pyplot as plt # to plot error during training
from data.get_data import pull_training_data
from data.db_connect import Database from data.db_connect import Database
from data.stats_importer import Importer
from data.build_weather import get_weather, get_sun_and_moon_phase from data.build_weather import get_weather, get_sun_and_moon_phase
from data.stats_importer import Importer
# input data # input data
inputs = np.array([[0, 0, 1, 0], inputs = np.array([[0, 0, 1, 0],
@@ -15,19 +17,27 @@ inputs = np.array([[0, 0, 1, 0],
outputs = np.array([[0], [0], [0], [1], [1], [1]]) outputs = np.array([[0], [0], [0], [1], [1], [1]])
if __name__ == '__main__': if __name__ == '__main__':
db_file = "./database/baseball.db"
db_conn = Database(db_file)
query = "SELECT game_date, game_number, park_id FROM games ORDER BY game_date"
all_games = db_conn.selectall(query)
for game in all_games:
game_result, training_data = pull_training_data(db_conn, str(game[0]), game[1], game[2])
"""
build_db_path = "./data/sql/build_db.sql" build_db_path = "./data/sql/build_db.sql"
fill_parks_path = "./data/sql/prefill_parks.sql" fill_parks_path = "./data/sql/prefill_parks.sql"
fill_teams_path = "./data/sql/prefill_teams.sql" fill_teams_path = "./data/sql/prefill_teams.sql"
db_file = "./database/baseball.db"
db_conn = Database(db_file)
db_conn.run_sql_file(build_db_path) db_conn.run_sql_file(build_db_path)
db_conn.run_sql_file(fill_parks_path) db_conn.run_sql_file(fill_parks_path)
db_conn.run_sql_file(fill_teams_path) db_conn.run_sql_file(fill_teams_path)
imp = Importer(db_conn) imp = Importer(db_conn)
imp.parse_all_data("./data/stats/to_import", "./data/stats/imported/") imp.parse_all_data("./data/stats/", "./data/stats/imported/")
"""
""" """
else: else: