Making GTFS from the payanam data

Assuming default timings : trip_times for this run

Use... not the jsons, but the stops_all.csv and routes.csv generated by reports script.

Days of week : assume all days

In [1]:
import pandas as pd
from collections import OrderedDict
import json, os, time, datetime

# and import custom functions
import gtfs_common as gc
In [2]:
t1 = time.time()
In [3]:
DEV = True
DEVLimit = 50
In [4]:
try:
    root = os.path.dirname(__file__) # needed for tornado and all other paths, prog should work even if called from other working directory.
except NameError as e:
    root = '../payanam/'

reportsFolder = os.path.join(root,'reports/')
# gtfsFolder = os.path.join(root,'gtfs/')
gtfsFolder = 'gtfs/'
configFile = os.path.join(root,'config/','config.json')
logFolder = logFolder = os.path.join(root,'reports/logs')

stopsFile = 'stops_all.csv'
routesFile = 'routes.csv'
In [5]:
config = json.load(open(configFile, 'r'))
config
Out[5]:
{'databank': 'stops-databank.csv',
 'mapped': 'stops_mapped.csv',
 'datacleaned': 'stopnames-datacleaning-2.1.19.csv',
 'sanityDistance': 7,
 'fuzzyTopN': 10,
 'timeDefaults': {'trip_times': '06:00, 10:00, 14:00, 18:00, 22:00',
  'first_trip_start': '06:00',
  'last_trip_start': '22:00',
  'hr': 4,
  'min': 0,
  'sec': 0},
 'defaultSpeed': 15,
 'STARTLOCATION': [17.390491, 78.484102]}
In [6]:
# create folders if they don't exist
for folder in [gtfsFolder]:
    if not os.path.exists(folder):
        os.makedirs(folder)
In [7]:
default_trip_times = gc.tripTimesProcess( config.get('timeDefaults',{}).get('trip_times','10:00') )
# ah I see what ya did there. you put a default on the default. cheers mate
default_trip_times
Out[7]:
['06:00:00', '10:00:00', '14:00:00', '18:00:00', '22:00:00']

functions

In [8]:
def logmessage( *content ):
    global logFolder
    timestamp = '{:%Y-%b-%d %H:%M:%S} :'.format(datetime.datetime.now())
    # from https://stackoverflow.com/a/26455617/4355695
    line = ' '.join(str(x) for x in list(content))
    # str(x) for x in list(content) : handles numbers in the list, converts them to string before concatenating. 
    # from https://stackoverflow.com/a/3590168/4355695
    print(line) # print to screen also
    f = open(os.path.join(logFolder, 'log.txt'), 'a', newline='\r\n', encoding='utf8') #open in append mode
    print(timestamp, line, file=f)
    # `,file=f` argument at end writes the line, with newline as defined, to the file instead of to screen. 
    # from https://stackoverflow.com/a/2918367/4355695
    f.close()
In [9]:
# load up the df's
stops_src = pd.read_csv(os.path.join(reportsFolder,stopsFile),dtype=str).fillna('')
stops_src.head()
Out[9]:
sr workStatus folder jsonFile depot routeName direction_id stop_sequence stop_name stop_lat stop_lon zap confidence stop_desc
0 0 working CNT 280_564.json CNT 280/564 0 1 Jubilee Bus Station 17.44815 78.49655 jubileebusstation 6
1 1 working CNT 280_564.json CNT 280/564 0 2 Secunderabad 17.43835 78.51008 secunderabad 2
2 2 working CNT 280_564.json CNT 280/564 0 3 Alugadda Bavi 17.43548 78.51421 alugaddabavi 0
3 3 working CNT 280_564.json CNT 280/564 0 4 Mettuguda 17.4354 78.51951 mettuguda 0
4 4 working CNT 280_564.json CNT 280/564 0 5 Tarnaka 17.42696 78.5305 tarnaka 0
In [10]:
routes_src = pd.read_csv(os.path.join(reportsFolder,routesFile),dtype=str).fillna('')
routes_src.head()
Out[10]:
sr workStatus folder jsonFile depot routeName routeLongName busType extra0 extra1 ... mapped0 len1 mapped1 autoMapped avgConfidence mapped%0 mapped%1 mapped%total autoMapped% manuallyMapped%
0 0 working CNT 280_564.json CNT 280/564 JBS to NAGIREDDY PALLY CO Mofussil ... 37.0 40.0 37.0 50.0 0.7 92.5 92.5 92.5 62.5 30.0
1 1 working CNT 20P_ME.json CNT 20P ME Secunderabad to Nampally, CBS ME currently UP journey not touching Ashok Nagar,... ... 23.0 23.0 23.0 27.0 0.8 100.0 100.0 100.0 58.7 41.3
2 2 working CNT 47L_ME.json CNT 47L ME JBS to Film Nagar, MANIKONDA ME ... 27.0 27.0 27.0 32.0 0.7 100.0 100.0 100.0 59.3 40.7
3 3 working CNT 107JD.json CNT 107JD Rethifile to Dilsukhnagar ME ... 21.0 21.0 21.0 11.0 1.6 100.0 100.0 100.0 26.2 73.8
4 4 working CNT 47Y.json CNT 47Y SECUNDERABAD to Yousufguda MANIKONDA CO ... 25.0 29.0 24.0 49.0 0.0 86.2 82.8 84.5 84.5 0.0

5 rows × 23 columns

agency.txt

In [11]:
agencyDF = pd.DataFrame(data={'agency_id':['TSRTC_HYD'],'agency_name':['Telangana State Road Transport Corporation (Hyderabad local bus)'],\
    'agency_url': ['http://tsrtconline.in'], 'agency_timezone':['Asia/Kolkata']}) 
agencyDF
Out[11]:
agency_id agency_name agency_url agency_timezone
0 TSRTC_HYD Telangana State Road Transport Corporation (Hy... http://tsrtconline.in Asia/Kolkata

calendar.txt

In [12]:
calendarDF = pd.DataFrame(data={"service_id":["WK","SA","SU","ALL"],    "monday":['1','0','0','1'], "tuesday":['1','0','0','1'], "wednesday":['1','0','0','1'],    "thursday":['1','0','0','1'], "friday":['1','0','0','1'],    "saturday":['0','1','0','1'], "sunday":['0','0','1','1']})
calendarDF['start_date'] = 20190101
calendarDF['end_date'] = 20221231
calendarDF
Out[12]:
service_id monday tuesday wednesday thursday friday saturday sunday start_date end_date
0 WK 1 1 1 1 1 0 0 20190101 20221231
1 SA 0 0 0 0 0 1 0 20190101 20221231
2 SU 0 0 0 0 0 0 1 20190101 20221231
3 ALL 1 1 1 1 1 1 1 20190101 20221231

routes.txt

decide:
route_id : {folder}:{jsonFile minus the .json}
trip_id : {route_id}:d{direction}:n for 1..n

In [13]:
# mutate the routes file itself to make route.txt
routes_src['route_id'] = routes_src.apply(lambda x: "{}:{}".format(x.folder,x.jsonFile[:-5]), axis=1)
routes_src['route_type'] = '3'
In [14]:
routesDF = routes_src[['route_id','routeName','routeLongName','busType','route_type']].rename(index=str,\
    columns={'routeName':'route_short_name', 'routeLongName':'route_long_name','busType':'route_categories'})
routesDF
Out[14]:
route_id route_short_name route_long_name route_categories route_type
0 CNT:280_564 280/564 JBS to NAGIREDDY PALLY CO Mofussil 3
1 CNT:20P_ME 20P ME Secunderabad to Nampally, CBS ME 3
2 CNT:47L_ME 47L ME JBS to Film Nagar, MANIKONDA ME 3
3 CNT:107JD 107JD Rethifile to Dilsukhnagar ME 3
4 CNT:47Y 47Y SECUNDERABAD to Yousufguda MANIKONDA CO 3
5 CNT:16D 16D Regimental Bazar to Damaiguda, Ambedkar Nagar CO 3
6 CNT:212_568 212/568 Secunderabad to Laxmapur, Thimapur, Kondapur CO Mofussil 3
7 CNT:16X 16X Secunderabad- Prasanth Nagar - Secunderabad CO 3
8 CNT:290U 290U JBS to HAYATHNAGAR MD 3
9 CNT:290_463 290/463 J.B.S to SAIBRUNDAVANAM COLONY Mofussil 3
10 CNT:24B 24B Gurudwara to Balajinagar CO, ME 3
11 CNT:107VR 107VR Rethifile to Dilsukhnagar, LB Nagar ME,MD 3
12 CNT:10YH 10YH Secunderabad to Gayatri Hills CO 3
13 CNT:5M 5M Secunderabad to Mehdipatnam ME 3
14 CNT:219 219 Secunderabad to Patancheru ME, CO 3
15 CNT:24BD 24BD Gurudwara to Damaiguda CO, ME 3
16 CNT:21B 21B Gurudwara to Bhudevi Nagar CO 3
17 CNT:280_564M 280/564M JBS to Madharam Village CO Mofussil 3
18 CNT:16A 16A Regimental Bazar to ECIL Xroad CO 3
19 CNT:44EX 44EX Rathifile Parsigutta Rathifile CO 3
20 CNT:23B 23B Gurudwara to RTC colony, Bhudevi Nagar CO 3
21 CNT:279 279 JBS to Ibrahimpatnam ME, MD 3
22 CNT:107JS 107JS Rethifile to Saroornagar CO 3
23 CNT:16C 16C Regimental Bazar to ECIL Xroad CO 3
24 CNT:568 568 Secunderabad to Laxmapur, Thimapur, Kondapur Mofussil 3
25 CNT:290AF 290AF JBS to Ramoji Film City Mofussil 3
26 CNT:219_ORD_SUB 219 ORD SUB Secunderabad to Patancheru ORD-Sub ORD-Sub 3
27 CNT:2C 2C Rathifile to Barkas ME 3
28 CNT:47L 47L JBS to Film Nagar, MANIKONDA CO, ME 3
29 CNT:23GF 23GF Gurudwara to Greenfields, Kistama Enclave CO 3
... ... ... ... ... ...
1194 BHEL:226 226 3
1195 BHEL:518 518 Ordinary 3
1196 BHEL:218_102 218/102 Ordinary 3
1197 MDCL:649 649 Mehdipatnam to Medchal 3
1198 MDCL:211 211 Secunderabad to Aliabad SC Colony 3
1199 MDCL:296 296 VBIT to Medchal 3
1200 MDCL:295 295 Infotech to Medchal 3
1201 MDCL:8C_229 8C/229 Afzalgunj to Medchal 3
1202 MDCL:229 229 Medchal to Secunderabad MD,ME,SU,CO 3
1203 MDCL:587 587 Medchal to Yadagiri gutta MO 3
1204 MDCL:227 227 Secunderabad to Gandimaisamma MD,ME,SU,CO 3
1205 MDCL:567C 567C Medchal to Narsapur MO 3
1206 MDCL:29Q 29Q Secunderabad to Medchal 3
1207 MDCL:495 495 Secunderabad to Medchal MO 3
1208 MDCL:591 591 Medchal to Balanag MO 3
1209 MDCL:219_229 219/229 Medchal to Patancheru MD,ME,SU 3
1210 MDCL:229R 229R Secunderabad to Medchal 3
1211 MDCL:589 589 Secunderabad to Ramayanpet MO 3
1212 MDCL:212_702 212/702 Medchal to Secunderabad MO 3
1213 MDCL:233 233 Secunderabad to Medchal 3
1214 MDCL:588 588 Medchal to Narsapur MO 3
1215 MDCL:229P 229P Secunderabad to Medchal 3
1216 MDCL:229D 229D Secunderabad to Dabilpura 3
1217 MDCL:235G 235G Secunderabad to Medchal 3
1218 MDCL:272G 272G Secunderabad to Medchal 3
1219 MDCL:7K_229 7K/229 Afzalgunj to Nutankal 3
1220 MDCL:586 586 MO 3
1221 MDCL:590 590 Secunderabad to Veldurthy MO 3
1222 MDCL:90L_229 90L/229 Medchal to LB Nagar MD,ME,SU 3
1223 MDCL:585 585 Medchal to Yadagiri gutta MO 3

1224 rows × 5 columns

In [15]:
# DEVine intervention
if DEV:
    routesDF = routesDF.iloc[:DEVLimit]
In [16]:
t2 = time.time()
logmessage("Starting + agency + calendar + routes took {} seconds.".format(round(t2-t1,2)))
Starting + agency + calendar + routes took 2.07 seconds.

process stops

  • de-dupe by: zap name, stop_lat, stop_lon
  • assign a stop_id to each such triplet
In [17]:
stops_uniqueDF = stops_src[(stops_src.stop_lat!='') & (stops_src.stop_lon!='')]\
    [['stop_name','stop_lat','stop_lon','zap']]\
    .drop_duplicates(['stop_name','stop_lat','stop_lon'])\
    .sort_values(['zap','stop_lat']).copy().reset_index(drop=True)
logmessage(len(stops_uniqueDF))
stops_uniqueDF.head()
5387
Out[17]:
stop_name stop_lat stop_lon zap
0 1st Phase 17.45297 78.56591 1stphase
1 1st Phase/H.B. Colony 17.45352 78.56585 1stphasehbcolony
2 4th Phase 17.47122 78.38798 4thphase
3 6 Number / Amberpet 17.39142 78.5126 6numberamberpet
4 6 Number / Amberpet 17.39157 78.51238 6numberamberpet
In [18]:
stops_uniqueDF['stop_id'] = '' # initiate column with blanks
In [19]:
# to do next : assign stop_id's, store as stops.txt
for N in range(len(stops_uniqueDF) ) :
    zap = stops_uniqueDF.at[N,'zap']
    suffix = 0
    while True:
        suffix +=1
        stop_id = '{}{}'.format(zap[:6].upper(),suffix)
        if stop_id not in stops_uniqueDF['stop_id'].tolist() :
            break
    stops_uniqueDF.at[N,'stop_id'] = stop_id
In [20]:
stops_uniqueDF.sample(10)
Out[20]:
stop_name stop_lat stop_lon zap stop_id
4603 Shoppers Stop South 17.44483 78.46561 shoppersstopsouth SHOPPE3
2382 Kishanguda 17.24841 78.37419 kishanguda KISHAN3
4813 Suthariguda 17.60958 78.48835 suthariguda SUTHAR1
3463 NIN Colony 17.41637 78.59352 nincolony NINCOL1
4803 Survey Of India-Uppal X Road 17.40507 78.55662 surveyofindiauppalxroad SURVEY2
881 Chandupatlaguda 17.44434 78.70103 chandupatlaguda CHANDU3
1877 IDPL Colony Water Tank 17.4817 78.44921 idplcolonywatertank IDPLCO4
4057 Ramoji Film City Gate 17.31164 78.68288 ramojifilmcitygate RAMOJI1
2943 Mangalguda 17.01739 78.35217 mangalguda MANGAL1
3591 Osmania Hospital (OGH) 17.37109 78.47129 osmaniahospitalogh OSMANI2
In [21]:
stopsDF = stops_uniqueDF[['stop_id','stop_name','stop_lat','stop_lon']]
In [22]:
t3 = time.time()
logmessage("Stops processing took {} seconds.".format(round(t3-t2,2)))
Stops processing took 6.57 seconds.
In [23]:
'#'*70
Out[23]:
'######################################################################'

trips.txt and stop_times.txt

  • Timings: just the defaults for now
  • have to check if a route doesn't have reverse direction then don't provision those trips
In [24]:
# make route_id in the stops_src DF too
stops_src['route_id'] = stops_src.apply(lambda x: "{}:{}".format(x.folder,x.jsonFile[:-5]), axis=1)
In [25]:
tripsCollector = []
stopTimesCollector = []
oneDirList = [] # spinoff data : routes that have only one direction

for rN, route_id in enumerate(routesDF.route_id):
    logmessage(rN, route_id)
    for direction_id in ['0','1']:
        # check for presence of entries
        
        this_sequence = stops_src[(stops_src.route_id == route_id) & \
            (stops_src.direction_id == direction_id) & \
            (stops_src.stop_lat != '') & (stops_src.stop_lon != '')].copy().reset_index(drop=True)
        if not len(this_sequence):
            logmessage("Route {}: No data for direction {}".format(route_id,direction_id))
            oneDirList.append(route_id)
            continue
            
        tripLen = len(this_sequence) # this is also of how many stops are actually mapped
        
        this_trip_times = default_trip_times.copy() # for now, just the defaults
        
        this_speed = config.get('defaultSpeed',15)
        
        this_duration, this_distance = gc.computeDuration(this_sequence, this_speed ) # gets duration in hh:mm:ss, and total distance
        
        tripTimesArray = gc.timeEngineTrips(this_trip_times,this_duration)
        
        logmessage("direction {}: distance: {} km. duration: {}".format(direction_id, this_distance, this_duration))
        
        for N, couple in enumerate(tripTimesArray):
            tripRow = OrderedDict({'route_id':route_id})
            tripRow['service_id'] = 'ALL' # assume only one for now
            trip_id = "{}:d{}:{}".format(route_id,direction_id,(N+1))
            tripRow['trip_id'] = trip_id
            tripRow['direction_id'] = direction_id
            # extra:
            tripRow['num_stops'] = len(this_sequence)
            tripRow['distance'] = this_distance
            tripRow['duration'] = this_duration
            tripRow['start_time'] = couple[0]
            
            tripsCollector.append(tripRow.copy())
            
            # make a df itself?
            # prep up this_sequence to be a stop_times type table
            # nah just iterate, lad
            for seqN, seqRow in this_sequence.iterrows():
                stRow = OrderedDict()
                stRow['trip_id'] = trip_id
                if seqN == 0:
                    stRow['arrival_time'] = stRow['departure_time'] = couple[0]
                elif seqN == (len(this_sequence)-1) :
                    stRow['arrival_time'] = stRow['departure_time'] = couple[1]
                else:
                    stRow['arrival_time'] = stRow['departure_time'] = ''
                
                # stop_id : find by matching name, lat, lon
                stRow['stop_id'] = stops_uniqueDF[(stops_uniqueDF.stop_name==seqRow.stop_name ) & \
                    (stops_uniqueDF.stop_lat  == seqRow.stop_lat ) & \
                    (stops_uniqueDF.stop_lon  == seqRow.stop_lon )].stop_id.tolist()[0]
                
                stRow['stop_sequence'] = seqN + 1
                stRow['timepoint'] = '0'
                stopTimesCollector.append(stRow.copy() )
            
            
        
0 CNT:280_564
direction 0: distance: 60.7 km. duration: 04:02:41
direction 1: distance: 61.1 km. duration: 04:04:14
1 CNT:20P_ME
direction 0: distance: 11.4 km. duration: 00:45:34
direction 1: distance: 11.6 km. duration: 00:46:31
2 CNT:47L_ME
direction 0: distance: 18.0 km. duration: 01:12:02
direction 1: distance: 18.1 km. duration: 01:12:36
3 CNT:107JD
direction 0: distance: 10.5 km. duration: 00:41:50
direction 1: distance: 10.4 km. duration: 00:41:46
4 CNT:47Y
direction 0: distance: 18.5 km. duration: 01:13:53
direction 1: distance: 18.5 km. duration: 01:14:00
5 CNT:16D
direction 0: distance: 16.3 km. duration: 01:05:22
direction 1: distance: 17.1 km. duration: 01:08:17
6 CNT:212_568
direction 0: distance: 50.9 km. duration: 03:23:43
direction 1: distance: 50.8 km. duration: 03:23:00
7 CNT:16X
direction 0: distance: 11.9 km. duration: 00:47:38
Route CNT:16X: No data for direction 1
8 CNT:290U
direction 0: distance: 20.9 km. duration: 01:23:46
direction 1: distance: 21.0 km. duration: 01:24:05
9 CNT:290_463
direction 0: distance: 36.6 km. duration: 02:26:24
direction 1: distance: 36.6 km. duration: 02:26:12
10 CNT:24B
direction 0: distance: 13.0 km. duration: 00:51:53
direction 1: distance: 13.1 km. duration: 00:52:12
11 CNT:107VR
direction 0: distance: 15.3 km. duration: 01:01:05
direction 1: distance: 15.5 km. duration: 01:01:55
12 CNT:10YH
direction 0: distance: 10.7 km. duration: 00:42:55
direction 1: distance: 12.0 km. duration: 00:48:00
13 CNT:5M
direction 0: distance: 11.0 km. duration: 00:43:53
direction 1: distance: 12.4 km. duration: 00:49:24
14 CNT:219
direction 0: distance: 29.3 km. duration: 01:57:17
direction 1: distance: 29.0 km. duration: 01:55:48
15 CNT:24BD
direction 0: distance: 16.7 km. duration: 01:06:38
direction 1: distance: 16.5 km. duration: 01:05:58
16 CNT:21B
direction 0: distance: 8.3 km. duration: 00:33:10
direction 1: distance: 8.1 km. duration: 00:32:17
17 CNT:280_564M
direction 0: distance: 35.5 km. duration: 02:22:02
direction 1: distance: 35.4 km. duration: 02:21:26
18 CNT:16A
direction 0: distance: 12.8 km. duration: 00:51:14
direction 1: distance: 13.1 km. duration: 00:52:22
19 CNT:44EX
direction 0: distance: 6.1 km. duration: 00:24:24
Route CNT:44EX: No data for direction 1
20 CNT:23B
direction 0: distance: 7.3 km. duration: 00:29:10
direction 1: distance: 7.3 km. duration: 00:29:10
21 CNT:279
direction 0: distance: 38.2 km. duration: 02:32:43
direction 1: distance: 38.8 km. duration: 02:35:12
22 CNT:107JS
direction 0: distance: 14.8 km. duration: 00:59:17
direction 1: distance: 14.8 km. duration: 00:59:19
23 CNT:16C
direction 0: distance: 11.7 km. duration: 00:46:41
direction 1: distance: 12.3 km. duration: 00:49:07
24 CNT:568
direction 0: distance: 54.7 km. duration: 03:38:41
direction 1: distance: 54.7 km. duration: 03:38:50
25 CNT:290AF
direction 0: distance: 36.4 km. duration: 02:25:38
direction 1: distance: 36.4 km. duration: 02:25:31
26 CNT:219_ORD_SUB
direction 0: distance: 29.3 km. duration: 01:57:07
direction 1: distance: 29.1 km. duration: 01:56:17
27 CNT:2C
direction 0: distance: 18.9 km. duration: 01:15:36
direction 1: distance: 18.0 km. duration: 01:12:02
28 CNT:47L
direction 0: distance: 18.4 km. duration: 01:13:41
direction 1: distance: 18.6 km. duration: 01:14:19
29 CNT:23GF
direction 0: distance: 8.2 km. duration: 00:32:36
direction 1: distance: 8.2 km. duration: 00:32:43
30 CNT:272G
direction 0: distance: 20.3 km. duration: 01:21:19
direction 1: distance: 19.9 km. duration: 01:19:46
31 CNT:14X
direction 0: distance: 12.0 km. duration: 00:47:53
Route CNT:14X: No data for direction 1
32 CNT:20P
direction 0: distance: 8.5 km. duration: 00:34:10
direction 1: distance: 8.8 km. duration: 00:35:07
33 CNT:8J_A
direction 0: distance: 9.7 km. duration: 00:38:50
direction 1: distance: 9.6 km. duration: 00:38:34
34 CNT:24BJ
direction 0: distance: 14.4 km. duration: 00:57:43
direction 1: distance: 15.2 km. duration: 01:00:55
35 HN2:90L_300
direction 0: distance: 40.6 km. duration: 02:42:14
direction 1: distance: 41.1 km. duration: 02:44:22
36 HN2:156_126
direction 0: distance: 37.9 km. duration: 02:31:43
direction 1: distance: 36.5 km. duration: 02:25:53
37 HN2:299
direction 0: distance: 15.3 km. duration: 01:01:12
direction 1: distance: 15.3 km. duration: 01:01:10
38 HN2:251
direction 0: distance: 25.6 km. duration: 01:42:34
direction 1: distance: 25.6 km. duration: 01:42:34
39 HN2:1D_299
direction 0: distance: 22.0 km. duration: 01:28:07
direction 1: distance: 21.7 km. duration: 01:26:58
40 HN2:204
direction 0: distance: 21.9 km. duration: 01:27:46
direction 1: distance: 21.9 km. duration: 01:27:43
41 HN2:1V
direction 0: distance: 19.9 km. duration: 01:19:46
direction 1: distance: 19.5 km. duration: 01:18:05
42 HN2:225L
direction 0: distance: 40.3 km. duration: 02:41:12
direction 1: distance: 39.9 km. duration: 02:39:46
43 HN2:300_126M
direction 0: distance: 45.8 km. duration: 03:03:10
direction 1: distance: 45.3 km. duration: 03:01:14
44 HN2:2TT_D_N
direction 0: distance: 18.9 km. duration: 01:15:34
direction 1: distance: 18.9 km. duration: 01:15:43
45 HN2:156V
direction 0: distance: 20.5 km. duration: 01:21:58
direction 1: distance: 20.2 km. duration: 01:20:53
46 HN2:299H
direction 0: distance: 21.0 km. duration: 01:23:48
direction 1: distance: 20.2 km. duration: 01:21:00
47 HN2:156_205B
direction 0: distance: 37.4 km. duration: 02:29:41
direction 1: distance: 36.7 km. duration: 02:26:58
48 HN2:158JL
direction 0: distance: 37.4 km. duration: 02:29:26
direction 1: distance: 36.9 km. duration: 02:27:43
49 HN2:205B
direction 0: distance: 30.0 km. duration: 02:00:12
direction 1: distance: 30.0 km. duration: 02:00:05
In [26]:
tripsDF = pd.DataFrame(tripsCollector)
tripsDF.head(10)
Out[26]:
route_id service_id trip_id direction_id num_stops distance duration start_time
0 CNT:280_564 ALL CNT:280_564:d0:1 0 37 60.7 04:02:41 06:00:00
1 CNT:280_564 ALL CNT:280_564:d0:2 0 37 60.7 04:02:41 10:00:00
2 CNT:280_564 ALL CNT:280_564:d0:3 0 37 60.7 04:02:41 14:00:00
3 CNT:280_564 ALL CNT:280_564:d0:4 0 37 60.7 04:02:41 18:00:00
4 CNT:280_564 ALL CNT:280_564:d0:5 0 37 60.7 04:02:41 22:00:00
5 CNT:280_564 ALL CNT:280_564:d1:1 1 37 61.1 04:04:14 06:00:00
6 CNT:280_564 ALL CNT:280_564:d1:2 1 37 61.1 04:04:14 10:00:00
7 CNT:280_564 ALL CNT:280_564:d1:3 1 37 61.1 04:04:14 14:00:00
8 CNT:280_564 ALL CNT:280_564:d1:4 1 37 61.1 04:04:14 18:00:00
9 CNT:280_564 ALL CNT:280_564:d1:5 1 37 61.1 04:04:14 22:00:00
In [27]:
stopTimesDF = pd.DataFrame(stopTimesCollector)
stopTimesDF.head(10)
Out[27]:
trip_id arrival_time departure_time stop_id stop_sequence timepoint
0 CNT:280_564:d0:1 06:00:00 06:00:00 JUBILE1 1 0
1 CNT:280_564:d0:1 SECUND5 2 0
2 CNT:280_564:d0:1 ALUGAD2 3 0
3 CNT:280_564:d0:1 METTUG3 4 0
4 CNT:280_564:d0:1 TARNAK1 5 0
5 CNT:280_564:d0:1 HABSIG1 6 0
6 CNT:280_564:d0:1 NGRI1S1 7 0
7 CNT:280_564:d0:1 SURVEY1 8 0
8 CNT:280_564:d0:1 LITTLE1 9 0
9 CNT:280_564:d0:1 UPPALX1 10 0
In [28]:
t4 = time.time()
logmessage("trips and stop_times process took {} seconds.".format(round(t4-t3,2)))
trips and stop_times process took 148.92 seconds.
In [29]:
'#'*70
Out[29]:
'######################################################################'
In [30]:
# done! saving as agency.txt
agencyDF.to_csv(os.path.join(gtfsFolder+'agency.txt'),index=False)
logmessage('Created agency.txt')
Created agency.txt
In [31]:
# done! saving as calendar.txt
calendarDF.to_csv(os.path.join(gtfsFolder+'calendar.txt'),index=False)
logmessage('Created calendar.txt')
Created calendar.txt
In [32]:
stopsDF.to_csv(os.path.join(gtfsFolder,'stops.txt'),index=False)
logmessage('Created stops.txt, {} entries.'.format(len(stopsDF)))
Created stops.txt, 5387 entries.
In [33]:
routesDF.to_csv(os.path.join(gtfsFolder,'routes.txt'),index=False)
logmessage('Created routes.txt, {} entries.'.format(len(routesDF)))
Created routes.txt, 50 entries.
In [34]:
tripsDF.to_csv(os.path.join(gtfsFolder,'trips.txt'),index=False)
logmessage('Created trips.txt, {} entries.'.format(len(tripsDF)))
Created trips.txt, 485 entries.
In [35]:
stopTimesDF.to_csv(os.path.join(gtfsFolder,'stop_times.txt'),index=False)
logmessage('Created stop_times.txt, {} entries.'.format(len(stopTimesDF)))
Created stop_times.txt, 13980 entries.
In [36]:
t5 = time.time()
logmessage("writing out all GTFS files took {} seconds.".format(round(t5-t4,2)))
logmessage("The whole GTFS creation script took {} seconds.".format(round(t5-t1,2)))
writing out all GTFS files took 0.99 seconds.
The whole GTFS creation script took 158.57 seconds.