Assuming default timings : trip_times for this run
Use... not the jsons, but the stops_all.csv and routes.csv generated by reports script.
Days of week : assume all days
import pandas as pd
from collections import OrderedDict
import json, os, time, datetime
# and import custom functions
import gtfs_common as gc
t1 = time.time()
DEV = True
DEVLimit = 50
try:
root = os.path.dirname(__file__) # needed for tornado and all other paths, prog should work even if called from other working directory.
except NameError as e:
root = '../payanam/'
reportsFolder = os.path.join(root,'reports/')
# gtfsFolder = os.path.join(root,'gtfs/')
gtfsFolder = 'gtfs/'
configFile = os.path.join(root,'config/','config.json')
logFolder = logFolder = os.path.join(root,'reports/logs')
stopsFile = 'stops_all.csv'
routesFile = 'routes.csv'
config = json.load(open(configFile, 'r'))
config
# create folders if they don't exist
for folder in [gtfsFolder]:
if not os.path.exists(folder):
os.makedirs(folder)
default_trip_times = gc.tripTimesProcess( config.get('timeDefaults',{}).get('trip_times','10:00') )
# ah I see what ya did there. you put a default on the default. cheers mate
default_trip_times
def logmessage( *content ):
global logFolder
timestamp = '{:%Y-%b-%d %H:%M:%S} :'.format(datetime.datetime.now())
# from https://stackoverflow.com/a/26455617/4355695
line = ' '.join(str(x) for x in list(content))
# str(x) for x in list(content) : handles numbers in the list, converts them to string before concatenating.
# from https://stackoverflow.com/a/3590168/4355695
print(line) # print to screen also
f = open(os.path.join(logFolder, 'log.txt'), 'a', newline='\r\n', encoding='utf8') #open in append mode
print(timestamp, line, file=f)
# `,file=f` argument at end writes the line, with newline as defined, to the file instead of to screen.
# from https://stackoverflow.com/a/2918367/4355695
f.close()
# load up the df's
stops_src = pd.read_csv(os.path.join(reportsFolder,stopsFile),dtype=str).fillna('')
stops_src.head()
routes_src = pd.read_csv(os.path.join(reportsFolder,routesFile),dtype=str).fillna('')
routes_src.head()
agencyDF = pd.DataFrame(data={'agency_id':['TSRTC_HYD'],'agency_name':['Telangana State Road Transport Corporation (Hyderabad local bus)'],\
'agency_url': ['http://tsrtconline.in'], 'agency_timezone':['Asia/Kolkata']})
agencyDF
calendarDF = pd.DataFrame(data={"service_id":["WK","SA","SU","ALL"], "monday":['1','0','0','1'], "tuesday":['1','0','0','1'], "wednesday":['1','0','0','1'], "thursday":['1','0','0','1'], "friday":['1','0','0','1'], "saturday":['0','1','0','1'], "sunday":['0','0','1','1']})
calendarDF['start_date'] = 20190101
calendarDF['end_date'] = 20221231
calendarDF
decide:
route_id : {folder}:{jsonFile minus the .json}
trip_id : {route_id}:d{direction}:n for 1..n
# mutate the routes file itself to make route.txt
routes_src['route_id'] = routes_src.apply(lambda x: "{}:{}".format(x.folder,x.jsonFile[:-5]), axis=1)
routes_src['route_type'] = '3'
routesDF = routes_src[['route_id','routeName','routeLongName','busType','route_type']].rename(index=str,\
columns={'routeName':'route_short_name', 'routeLongName':'route_long_name','busType':'route_categories'})
routesDF
# DEVine intervention
if DEV:
routesDF = routesDF.iloc[:DEVLimit]
t2 = time.time()
logmessage("Starting + agency + calendar + routes took {} seconds.".format(round(t2-t1,2)))
stops_uniqueDF = stops_src[(stops_src.stop_lat!='') & (stops_src.stop_lon!='')]\
[['stop_name','stop_lat','stop_lon','zap']]\
.drop_duplicates(['stop_name','stop_lat','stop_lon'])\
.sort_values(['zap','stop_lat']).copy().reset_index(drop=True)
logmessage(len(stops_uniqueDF))
stops_uniqueDF.head()
stops_uniqueDF['stop_id'] = '' # initiate column with blanks
# to do next : assign stop_id's, store as stops.txt
for N in range(len(stops_uniqueDF) ) :
zap = stops_uniqueDF.at[N,'zap']
suffix = 0
while True:
suffix +=1
stop_id = '{}{}'.format(zap[:6].upper(),suffix)
if stop_id not in stops_uniqueDF['stop_id'].tolist() :
break
stops_uniqueDF.at[N,'stop_id'] = stop_id
stops_uniqueDF.sample(10)
stopsDF = stops_uniqueDF[['stop_id','stop_name','stop_lat','stop_lon']]
t3 = time.time()
logmessage("Stops processing took {} seconds.".format(round(t3-t2,2)))
'#'*70
# make route_id in the stops_src DF too
stops_src['route_id'] = stops_src.apply(lambda x: "{}:{}".format(x.folder,x.jsonFile[:-5]), axis=1)
tripsCollector = []
stopTimesCollector = []
oneDirList = [] # spinoff data : routes that have only one direction
for rN, route_id in enumerate(routesDF.route_id):
logmessage(rN, route_id)
for direction_id in ['0','1']:
# check for presence of entries
this_sequence = stops_src[(stops_src.route_id == route_id) & \
(stops_src.direction_id == direction_id) & \
(stops_src.stop_lat != '') & (stops_src.stop_lon != '')].copy().reset_index(drop=True)
if not len(this_sequence):
logmessage("Route {}: No data for direction {}".format(route_id,direction_id))
oneDirList.append(route_id)
continue
tripLen = len(this_sequence) # this is also of how many stops are actually mapped
this_trip_times = default_trip_times.copy() # for now, just the defaults
this_speed = config.get('defaultSpeed',15)
this_duration, this_distance = gc.computeDuration(this_sequence, this_speed ) # gets duration in hh:mm:ss, and total distance
tripTimesArray = gc.timeEngineTrips(this_trip_times,this_duration)
logmessage("direction {}: distance: {} km. duration: {}".format(direction_id, this_distance, this_duration))
for N, couple in enumerate(tripTimesArray):
tripRow = OrderedDict({'route_id':route_id})
tripRow['service_id'] = 'ALL' # assume only one for now
trip_id = "{}:d{}:{}".format(route_id,direction_id,(N+1))
tripRow['trip_id'] = trip_id
tripRow['direction_id'] = direction_id
# extra:
tripRow['num_stops'] = len(this_sequence)
tripRow['distance'] = this_distance
tripRow['duration'] = this_duration
tripRow['start_time'] = couple[0]
tripsCollector.append(tripRow.copy())
# make a df itself?
# prep up this_sequence to be a stop_times type table
# nah just iterate, lad
for seqN, seqRow in this_sequence.iterrows():
stRow = OrderedDict()
stRow['trip_id'] = trip_id
if seqN == 0:
stRow['arrival_time'] = stRow['departure_time'] = couple[0]
elif seqN == (len(this_sequence)-1) :
stRow['arrival_time'] = stRow['departure_time'] = couple[1]
else:
stRow['arrival_time'] = stRow['departure_time'] = ''
# stop_id : find by matching name, lat, lon
stRow['stop_id'] = stops_uniqueDF[(stops_uniqueDF.stop_name==seqRow.stop_name ) & \
(stops_uniqueDF.stop_lat == seqRow.stop_lat ) & \
(stops_uniqueDF.stop_lon == seqRow.stop_lon )].stop_id.tolist()[0]
stRow['stop_sequence'] = seqN + 1
stRow['timepoint'] = '0'
stopTimesCollector.append(stRow.copy() )
tripsDF = pd.DataFrame(tripsCollector)
tripsDF.head(10)
stopTimesDF = pd.DataFrame(stopTimesCollector)
stopTimesDF.head(10)
t4 = time.time()
logmessage("trips and stop_times process took {} seconds.".format(round(t4-t3,2)))
'#'*70
# done! saving as agency.txt
agencyDF.to_csv(os.path.join(gtfsFolder+'agency.txt'),index=False)
logmessage('Created agency.txt')
# done! saving as calendar.txt
calendarDF.to_csv(os.path.join(gtfsFolder+'calendar.txt'),index=False)
logmessage('Created calendar.txt')
stopsDF.to_csv(os.path.join(gtfsFolder,'stops.txt'),index=False)
logmessage('Created stops.txt, {} entries.'.format(len(stopsDF)))
routesDF.to_csv(os.path.join(gtfsFolder,'routes.txt'),index=False)
logmessage('Created routes.txt, {} entries.'.format(len(routesDF)))
tripsDF.to_csv(os.path.join(gtfsFolder,'trips.txt'),index=False)
logmessage('Created trips.txt, {} entries.'.format(len(tripsDF)))
stopTimesDF.to_csv(os.path.join(gtfsFolder,'stop_times.txt'),index=False)
logmessage('Created stop_times.txt, {} entries.'.format(len(stopTimesDF)))
t5 = time.time()
logmessage("writing out all GTFS files took {} seconds.".format(round(t5-t4,2)))
logmessage("The whole GTFS creation script took {} seconds.".format(round(t5-t1,2)))