import wget
import traceback
from urllib.error import HTTPError
from os import path, mkdir, remove
from datetime import datetime, timedelta
import shutil
import os
# If set to True, it will re-download and overwrite existing files
OVERWRITE = False
DATE_INPUT_FORMAT = '%d/%m/%Y'
URL = '
https://promo.betfair.com/betfairsp/prices/'
SP_DATA_PATH = r'YOURPATHHERE!!!'
GREYHOUND_FOLDER = 'Greyhound'
HORSERACING_FOLDER = 'Horseracing'
PREFIX_LOCATION_MAP = {'dwbfgreyhoundplace': path.join(GREYHOUND_FOLDER, 'Greyhound_Place'),
'dwbfgreyhoundwin': path.join(GREYHOUND_FOLDER, 'Greyhound_Win'),
'dwbfpricesireplace': path.join(HORSERACING_FOLDER, 'IREHorseRacing_Place'),
'dwbfpricesirewin': path.join(HORSERACING_FOLDER, 'IREHorseRacing_Win'),
'dwbfpricesukwin': path.join(HORSERACING_FOLDER, 'UKHorseRacing_Win'),
'dwbfpricesukplace': path.join(HORSERACING_FOLDER, 'UKHorseRacing_Place')}
TEMP_FOLDER = 'Temp'
if __name__ == '__main__':
# Check if target folders exist and make sure temp folders are emptied
folders = list(PREFIX_LOCATION_MAP.values())
temp_folders = [path.join(x, TEMP_FOLDER) for x in folders]
for location in [GREYHOUND_FOLDER, HORSERACING_FOLDER] + folders + temp_folders:
folder_path = path.join(SP_DATA_PATH, location)
if location in temp_folders:
try:
shutil.rmtree(folder_path)
except FileNotFoundError:
pass
if not path.exists(folder_path):
mkdir(folder_path)
# Get date input
dt_start = dt_end = None
while dt_start is None:
try:
date_input = input('Please enter the start date of the Files you want to download: ').strip()
dt_start = datetime.strptime(date_input, DATE_INPUT_FORMAT)
except ValueError:
print('Could not parse the date.')
while dt_end is None:
try:
date_input = input('Please enter the end date of the Files you want to download: ').strip()
dt_end = datetime.strptime(date_input, DATE_INPUT_FORMAT)
except ValueError:
print('Could not parse the date.')
# Generate a range of dates
dates = [dt_start + timedelta(days=x) for x in range((dt_end - dt_start).days + 1)]
# Download files to temp folders
for date in dates:
date_str = datetime.strftime(date, '%d%m%Y')
for prefix in PREFIX_LOCATION_MAP:
try:
filename = '{}{}.csv'.format(prefix, date_str)
print(filename)
destination = path.join(SP_DATA_PATH, PREFIX_LOCATION_MAP[prefix], TEMP_FOLDER, filename)
if path.exists(destination):
if OVERWRITE:
remove(destination)
else:
continue
wget.download(URL + filename, destination, )
except HTTPError as http_error:
print('HTTP Error:', http_error.code)
except:
print(traceback.format_exc())
# Concatenate files
for prefix in PREFIX_LOCATION_MAP:
filename = '{}{}-{}.csv'.format(prefix,
datetime.strftime(dt_start, '%d%m%Y'),
datetime.strftime(dt_end, '%d%m%Y'))
file_path = path.join(SP_DATA_PATH, PREFIX_LOCATION_MAP[prefix], filename)
with open(file_path, 'w') as destination:
folder_path = os.path.join(SP_DATA_PATH, PREFIX_LOCATION_MAP[prefix], TEMP_FOLDER)
start = 0
for name in os.listdir(folder_path):
with open(os.path.join(folder_path, name)) as source:
lines = source.readlines()
# Column names should be added just once
destination.writelines(lines[start:])
start = 1
shutil.rmtree(folder_path)