Routing and analysis engine for GTFS (General Transit Feed Specification) data
at main 359 lines 13 kB view raw
1#include "csa.h" 2 3//' rcpp_csa 4//' 5//' Connection Scan Algorithm for GTFS data. The timetable has 6//' [departure_station, arrival_station, departure_time, arrival_time, 7//' trip_id], 8//' with all entries as integer values, including times in seconds after 9//' 00:00:00. The station and trip IDs can be mapped back on to actual station 10//' IDs, but do not necessarily form a single set of unit-interval values 11//' because the timetable is first cut down to only that portion after the 12//' desired start time. These are nevertheless used as direct array indices 13//' throughout, so are all size_t objects rather than int. All indices in the 14//' timetable and transfers DataFrames, as well as start_/end_stations, are 15//' 1-based, but they are still used directly which just means that the first 16//' entries (that is, entry [0]) of station and trip vectors are never used. 17//' 18//' @noRd 19// [[Rcpp::export]] 20Rcpp::DataFrame rcpp_csa (Rcpp::DataFrame timetable, 21 Rcpp::DataFrame transfers, 22 const size_t nstations, 23 const size_t ntrips, 24 const std::vector <size_t> start_stations, 25 const std::vector <size_t> end_stations, 26 const int start_time, 27 const int max_transfers) 28{ 29 30 CSA_Parameters csa_pars; 31 csa::fill_csa_pars (csa_pars, max_transfers, start_time, 32 static_cast <size_t> (timetable.nrow ()), ntrips, nstations); 33 34 std::unordered_set <size_t> start_stations_set, end_stations_set; 35 csa::make_station_sets (start_stations, end_stations, 36 start_stations_set, end_stations_set); 37 38 CSA_Inputs csa_in; 39 csa::make_transfer_map (csa_in.transfer_map, transfers); 40 41 // The csa_out vectors use nstations + 1 because it's 1-indexed throughout, 42 // and the first element is ignored. 43 const size_t n = csa_pars.nstations + 1; 44 CSA_Outputs csa_out (n); 45 46 csa::get_earliest_connection (start_stations, csa_pars.start_time, 47 csa_in.transfer_map, csa_out.earliest_connection); 48 49 csa::csa_in_from_df (timetable, csa_in); 50 51 CSA_Return csa_ret = csa::main_csa_loop (csa_pars, start_stations_set, 52 end_stations_set, csa_in, csa_out); 53 54 size_t route_len = csa::get_route_length (csa_out, csa_pars, 55 csa_ret.end_station); 56 57 std::vector <size_t> end_station_out (route_len), 58 trip_out (route_len, INFINITE_INT); 59 std::vector <int> time_out (route_len); 60 61 csa::extract_final_trip (csa_out, csa_ret, end_station_out, 62 trip_out, time_out); 63 64 Rcpp::DataFrame res = Rcpp::DataFrame::create ( 65 Rcpp::Named ("stop_number") = end_station_out, 66 Rcpp::Named ("time") = time_out, 67 Rcpp::Named ("trip_number") = trip_out, 68 Rcpp::_["stringsAsFactors"] = false); 69 70 return res; 71} 72 73void csa::fill_csa_pars ( 74 CSA_Parameters &csa_pars, 75 int max_transfers, 76 int start_time, 77 size_t timetable_size, 78 size_t ntrips, 79 size_t nstations) 80{ 81 82 csa_pars.max_transfers = max_transfers; 83 csa_pars.start_time = start_time; 84 csa_pars.timetable_size = timetable_size; 85 csa_pars.ntrips = ntrips; 86 csa_pars.nstations = nstations; 87} 88 89// make start and end stations into std::unordered_sets to allow constant-time 90// lookup. 91void csa::make_station_sets ( 92 const std::vector <size_t> &start_stations, 93 const std::vector <size_t> &end_stations, 94 std::unordered_set <size_t> &start_stations_set, 95 std::unordered_set <size_t> &end_stations_set) 96{ 97 98 for (auto i: start_stations) 99 start_stations_set.emplace (i); 100 for (auto i: end_stations) 101 end_stations_set.emplace (i); 102} 103 104void csa::csa_in_from_df ( 105 Rcpp::DataFrame &timetable, 106 CSA_Inputs &csa_in) 107{ 108 109 csa_in.departure_station = Rcpp::as <std::vector <size_t> > ( 110 timetable ["departure_station"]); 111 csa_in.arrival_station = Rcpp::as <std::vector <size_t> > ( 112 timetable ["arrival_station"]); 113 csa_in.trip_id = Rcpp::as <std::vector <size_t> > ( 114 timetable ["trip_id"]); 115 csa_in.departure_time = Rcpp::as <std::vector <int> > ( 116 timetable ["departure_time"]); 117 csa_in.arrival_time = Rcpp::as <std::vector <int> > ( 118 timetable ["arrival_time"]); 119} 120 121// convert transfers into a map from start to (end, transfer_time). 122void csa::make_transfer_map ( 123 TransferMapType &transfer_map, 124 Rcpp::DataFrame &transfers) 125{ 126 127 transfer_map.clear (); 128 std::vector <size_t> trans_from = transfers ["from_stop_id"], 129 trans_to = transfers ["to_stop_id"]; 130 std::vector <int> trans_time = transfers ["min_transfer_time"]; 131 for (size_t i = 0; i < static_cast <size_t> (transfers.nrow ()); i++) 132 if (trans_from [i] != trans_to [i]) 133 { 134 std::unordered_map <size_t, int> transfer_pair; // station, time 135 if (transfer_map.find (trans_from [i]) == 136 transfer_map.end ()) 137 { 138 transfer_pair.clear (); 139 transfer_pair.emplace (trans_to [i], trans_time [i]); 140 transfer_map.emplace (trans_from [i], transfer_pair); 141 } else 142 { 143 transfer_pair = transfer_map.at (trans_from [i]); 144 transfer_pair.emplace (trans_to [i], trans_time [i]); 145 transfer_map [trans_from [i] ] = transfer_pair; 146 } 147 } 148} 149 150void csa::get_earliest_connection ( 151 const std::vector <size_t> &start_stations, 152 const int &start_time, 153 const TransferMapType &transfer_map, 154 std::vector <int> &earliest_connection) 155{ 156 157 for (size_t i = 0; i < start_stations.size (); i++) 158 { 159 earliest_connection [start_stations [i]] = start_time; 160 if (transfer_map.find (start_stations [i]) != 161 transfer_map.end ()) 162 { 163 std::unordered_map <size_t, int> transfer_pair = 164 transfer_map.at (start_stations [i]); 165 // Don't penalise these first footpaths: 166 for (auto t: transfer_pair) 167 earliest_connection [t.first] = start_time; 168 //earliest_connection [t.first] = start_time + t.second; 169 } 170 } 171} 172 173CSA_Return csa::main_csa_loop ( 174 const CSA_Parameters &csa_pars, 175 const std::unordered_set <size_t> &start_stations_set, 176 std::unordered_set <size_t> &end_stations_set, 177 const CSA_Inputs &csa_in, 178 CSA_Outputs &csa_out) 179{ 180 181 CSA_Return csa_ret; 182 csa_ret.earliest_time = INFINITE_INT; 183 csa_ret.end_station = INFINITE_INT; 184 185 std::vector <bool> is_connected (csa_pars.ntrips, false); 186 187 // trip connections: 188 for (size_t i = 0; i < csa_pars.timetable_size; i++) 189 { 190 if (csa_in.departure_time [i] < csa_pars.start_time) 191 continue; // # nocov - these lines already removed in R fn. 192 193 // add all departures from start_stations_set: 194 if (start_stations_set.find (csa_in.departure_station [i]) != 195 start_stations_set.end () && 196 csa_in.arrival_time [i] <= csa_out.earliest_connection [csa_in.arrival_station [i] ]) 197 { 198 is_connected [csa_in.trip_id [i] ] = true; 199 csa::fill_one_csa_out (csa_out, csa_in, 200 csa_in.arrival_station [i], i); 201 } 202 203 // main connection scan: 204 if (((csa_out.earliest_connection [csa_in.departure_station [i] ] <= csa_in.departure_time [i]) && 205 csa_out.n_transfers [csa_in.departure_station [i] ] <= csa_pars.max_transfers) || 206 is_connected [csa_in.trip_id [i]]) 207 { 208 const bool time_earlier = csa_in.arrival_time [i] < csa_out.earliest_connection [csa_in.arrival_station [i] ]; 209 const bool time_equal = csa_in.arrival_time [i] == csa_out.earliest_connection [csa_in.arrival_station [i] ]; 210 const bool less_transfers = csa_out.n_transfers [csa_in.departure_station [i] ] < 211 csa_out.n_transfers [csa_in.arrival_station [i] ]; 212 213 if (time_earlier || (time_equal && less_transfers)) 214 { 215 csa::fill_one_csa_out (csa_out, csa_in, 216 csa_in.arrival_station [i], i); 217 218 csa_out.n_transfers [csa_in.arrival_station [i] ] = 219 csa_out.n_transfers [csa_in.departure_station [i] ]; 220 } 221 csa::check_end_stations (end_stations_set, csa_in.arrival_station [i], 222 csa_in.arrival_time [i], csa_ret); 223 224 if (csa_in.transfer_map.find (csa_in.arrival_station [i]) != csa_in.transfer_map.end ()) 225 { 226 for (auto t: csa_in.transfer_map.at (csa_in.arrival_station [i])) 227 { 228 size_t trans_dest = t.first; 229 int ttime = csa_in.arrival_time [i] + t.second; 230 if (ttime <= csa_out.earliest_connection [trans_dest] && 231 csa_out.n_transfers [trans_dest] <= csa_pars.max_transfers) 232 { 233 // modified version of fill_one_csa_out: 234 csa_out.earliest_connection [trans_dest] = ttime; 235 csa_out.prev_stn [trans_dest] = csa_in.arrival_station [i]; 236 csa_out.prev_time [trans_dest] = csa_in.arrival_time [i]; 237 csa_out.n_transfers [trans_dest]++; 238 239 csa::check_end_stations (end_stations_set, 240 trans_dest, ttime, csa_ret); 241 242 } 243 } 244 } 245 is_connected [csa_in.trip_id [i]] = true; 246 } 247 if (end_stations_set.size () == 0) 248 break; 249 } 250 return csa_ret; 251} 252 253/*! 254 * \param i index into station of csa_out for the connecting service csa_in 255 * \param j index into csa_in 256 */ 257void csa::fill_one_csa_out ( 258 CSA_Outputs &csa_out, 259 const CSA_Inputs &csa_in, 260 const size_t &i, 261 const size_t &j) 262{ 263 264 bool fill_vals = (csa_in.arrival_time [j] < csa_out.earliest_connection [i]); 265 if (!fill_vals) { 266 // service at that time already exists, so only replace if trip_id of 267 // csa_in is same as trip that connected to the departure station. 268 // This clause ensures connection remains on same service in cases of 269 // parallel services; see #48 270 const size_t this_stn = csa_in.departure_station [j]; 271 const size_t prev_trip = csa_out.current_trip [this_stn]; 272 273 fill_vals = (csa_in.trip_id [j] == prev_trip); 274 } 275 276 if (fill_vals) { 277 csa_out.earliest_connection [i] = csa_in.arrival_time [j]; 278 csa_out.current_trip [i] = csa_in.trip_id [j]; 279 csa_out.prev_stn [i] = csa_in.departure_station [j]; 280 csa_out.prev_time [i] = csa_in.departure_time [j]; 281 } 282} 283 284void csa::check_end_stations ( 285 std::unordered_set <size_t> &end_stations_set, 286 const size_t &arrival_station, 287 const int &arrival_time, 288 CSA_Return &csa_ret) 289{ 290 291 if (end_stations_set.find (arrival_station) != end_stations_set.end ()) 292 { 293 if (arrival_time < csa_ret.earliest_time) 294 { 295 csa_ret.earliest_time = arrival_time; 296 csa_ret.end_station = arrival_station; 297 } 298 end_stations_set.erase (arrival_station); 299 } 300} 301 302size_t csa::get_route_length ( 303 const CSA_Outputs &csa_out, 304 const CSA_Parameters &csa_pars, 305 const size_t &end_stn) 306{ 307 308 size_t count = 1; 309 size_t i = end_stn; 310 while (i < INFINITE_INT) 311 { 312 count++; 313 i = csa_out.prev_stn [static_cast <size_t> (i)]; 314 if (count > csa_pars.nstations) 315 Rcpp::stop ("no route found; something went wrong"); // # nocov 316 } 317 318 return count; 319} 320 321void csa::extract_final_trip ( 322 const CSA_Outputs &csa_out, 323 const CSA_Return &csa_ret, 324 std::vector <size_t> &end_station, 325 std::vector <size_t> &trip, 326 std::vector <int> &time) 327{ 328 329 size_t i = csa_ret.end_station; 330 if (i > csa_out.current_trip.size ()) // No route able to be found 331 { 332 end_station.clear (); 333 time.clear (); 334 trip.clear (); 335 } else 336 { 337 time [0] = csa_ret.earliest_time; 338 trip [0] = csa_out.current_trip [i]; 339 end_station [0] = i; 340 size_t count = 1; 341 while (i < INFINITE_INT) 342 { 343 time [count] = csa_out.prev_time [i]; 344 i = csa_out.prev_stn [static_cast <size_t> (i)]; 345 end_station [count] = i; 346 if (i < INFINITE_INT) 347 trip [count] = csa_out.current_trip [i]; 348 count++; 349 } 350 // The last entry of these is all INF, so must be removed. 351 end_station.resize (end_station.size () - 1); 352 time.resize (time.size () - 1); 353 trip.resize (trip.size () - 1); 354 // trip values don't exist for start stations of each route, so 355 for (size_t j = 1; j < trip.size (); j++) 356 if (trip [j] == INFINITE_INT) 357 trip [j] = trip [j - 1]; 358 } 359}