Moving around csvs and better handling of duplicate data. Pulling all relevant data

This commit is contained in:
2025-09-15 18:46:37 +00:00
parent 3950e99151
commit 125d133af5
9 changed files with 2450 additions and 33 deletions

View File

@@ -11,33 +11,17 @@ class Importer:
def __init__(self, database: Database):
self.database = database
def parse_all_data(self, source_dir, dest_dir):
# Ensure the destination directory exists
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
def parse_all_data(self, source_dir):
# List all files in the source and destination directories
source_files = set(os.listdir(source_dir))
for filename in os.listdir(dest_dir):
if filename.endswith('.csv'):
source_files.discard(filename)
dest_files = set(os.listdir(dest_dir))
# Find files that are in the source but not in the destination
missing_files = source_files - dest_files
# Copy any missing CSV files from the source directory to the destination directory
for filename in missing_files:
for filename in source_files:
src_file = os.path.join(source_dir, filename)
dest_file = os.path.join(dest_dir, filename)
if self.parse_one_file(f"{source_dir}/{filename}"):
try:
shutil.copy(src_file, dest_file)
print(f"Copied {filename} to {dest_dir}")
except Exception as e:
print(f"Failed to copy {filename}: {e}")
if not self.parse_one_file(f"{source_dir}/{filename}"):
print(f"Failed to parse {source_dir}/{filename}")
def parse_one_file(self, filepath):
bb_dict = {}
@@ -61,6 +45,7 @@ class Importer:
time.sleep(60*60*24)
if not self.populate_database_with_stats(game):
print(f"Failed to parse and populate {game}")
return False
return True