Routing and analysis engine for GTFS (General Transit Feed Specification) data
1#include "csa.h"
2
3//' rcpp_csa
4//'
5//' Connection Scan Algorithm for GTFS data. The timetable has
6//' [departure_station, arrival_station, departure_time, arrival_time,
7//' trip_id],
8//' with all entries as integer values, including times in seconds after
9//' 00:00:00. The station and trip IDs can be mapped back on to actual station
10//' IDs, but do not necessarily form a single set of unit-interval values
11//' because the timetable is first cut down to only that portion after the
12//' desired start time. These are nevertheless used as direct array indices
13//' throughout, so are all size_t objects rather than int. All indices in the
14//' timetable and transfers DataFrames, as well as start_/end_stations, are
15//' 1-based, but they are still used directly which just means that the first
16//' entries (that is, entry [0]) of station and trip vectors are never used.
17//'
18//' @noRd
19// [[Rcpp::export]]
20Rcpp::DataFrame rcpp_csa (Rcpp::DataFrame timetable,
21 Rcpp::DataFrame transfers,
22 const size_t nstations,
23 const size_t ntrips,
24 const std::vector <size_t> start_stations,
25 const std::vector <size_t> end_stations,
26 const int start_time,
27 const int max_transfers)
28{
29
30 CSA_Parameters csa_pars;
31 csa::fill_csa_pars (csa_pars, max_transfers, start_time,
32 static_cast <size_t> (timetable.nrow ()), ntrips, nstations);
33
34 std::unordered_set <size_t> start_stations_set, end_stations_set;
35 csa::make_station_sets (start_stations, end_stations,
36 start_stations_set, end_stations_set);
37
38 CSA_Inputs csa_in;
39 csa::make_transfer_map (csa_in.transfer_map, transfers);
40
41 // The csa_out vectors use nstations + 1 because it's 1-indexed throughout,
42 // and the first element is ignored.
43 const size_t n = csa_pars.nstations + 1;
44 CSA_Outputs csa_out (n);
45
46 csa::get_earliest_connection (start_stations, csa_pars.start_time,
47 csa_in.transfer_map, csa_out.earliest_connection);
48
49 csa::csa_in_from_df (timetable, csa_in);
50
51 CSA_Return csa_ret = csa::main_csa_loop (csa_pars, start_stations_set,
52 end_stations_set, csa_in, csa_out);
53
54 size_t route_len = csa::get_route_length (csa_out, csa_pars,
55 csa_ret.end_station);
56
57 std::vector <size_t> end_station_out (route_len),
58 trip_out (route_len, INFINITE_INT);
59 std::vector <int> time_out (route_len);
60
61 csa::extract_final_trip (csa_out, csa_ret, end_station_out,
62 trip_out, time_out);
63
64 Rcpp::DataFrame res = Rcpp::DataFrame::create (
65 Rcpp::Named ("stop_number") = end_station_out,
66 Rcpp::Named ("time") = time_out,
67 Rcpp::Named ("trip_number") = trip_out,
68 Rcpp::_["stringsAsFactors"] = false);
69
70 return res;
71}
72
73void csa::fill_csa_pars (
74 CSA_Parameters &csa_pars,
75 int max_transfers,
76 int start_time,
77 size_t timetable_size,
78 size_t ntrips,
79 size_t nstations)
80{
81
82 csa_pars.max_transfers = max_transfers;
83 csa_pars.start_time = start_time;
84 csa_pars.timetable_size = timetable_size;
85 csa_pars.ntrips = ntrips;
86 csa_pars.nstations = nstations;
87}
88
89// make start and end stations into std::unordered_sets to allow constant-time
90// lookup.
91void csa::make_station_sets (
92 const std::vector <size_t> &start_stations,
93 const std::vector <size_t> &end_stations,
94 std::unordered_set <size_t> &start_stations_set,
95 std::unordered_set <size_t> &end_stations_set)
96{
97
98 for (auto i: start_stations)
99 start_stations_set.emplace (i);
100 for (auto i: end_stations)
101 end_stations_set.emplace (i);
102}
103
104void csa::csa_in_from_df (
105 Rcpp::DataFrame &timetable,
106 CSA_Inputs &csa_in)
107{
108
109 csa_in.departure_station = Rcpp::as <std::vector <size_t> > (
110 timetable ["departure_station"]);
111 csa_in.arrival_station = Rcpp::as <std::vector <size_t> > (
112 timetable ["arrival_station"]);
113 csa_in.trip_id = Rcpp::as <std::vector <size_t> > (
114 timetable ["trip_id"]);
115 csa_in.departure_time = Rcpp::as <std::vector <int> > (
116 timetable ["departure_time"]);
117 csa_in.arrival_time = Rcpp::as <std::vector <int> > (
118 timetable ["arrival_time"]);
119}
120
121// convert transfers into a map from start to (end, transfer_time).
122void csa::make_transfer_map (
123 TransferMapType &transfer_map,
124 Rcpp::DataFrame &transfers)
125{
126
127 transfer_map.clear ();
128 std::vector <size_t> trans_from = transfers ["from_stop_id"],
129 trans_to = transfers ["to_stop_id"];
130 std::vector <int> trans_time = transfers ["min_transfer_time"];
131 for (size_t i = 0; i < static_cast <size_t> (transfers.nrow ()); i++)
132 if (trans_from [i] != trans_to [i])
133 {
134 std::unordered_map <size_t, int> transfer_pair; // station, time
135 if (transfer_map.find (trans_from [i]) ==
136 transfer_map.end ())
137 {
138 transfer_pair.clear ();
139 transfer_pair.emplace (trans_to [i], trans_time [i]);
140 transfer_map.emplace (trans_from [i], transfer_pair);
141 } else
142 {
143 transfer_pair = transfer_map.at (trans_from [i]);
144 transfer_pair.emplace (trans_to [i], trans_time [i]);
145 transfer_map [trans_from [i] ] = transfer_pair;
146 }
147 }
148}
149
150void csa::get_earliest_connection (
151 const std::vector <size_t> &start_stations,
152 const int &start_time,
153 const TransferMapType &transfer_map,
154 std::vector <int> &earliest_connection)
155{
156
157 for (size_t i = 0; i < start_stations.size (); i++)
158 {
159 earliest_connection [start_stations [i]] = start_time;
160 if (transfer_map.find (start_stations [i]) !=
161 transfer_map.end ())
162 {
163 std::unordered_map <size_t, int> transfer_pair =
164 transfer_map.at (start_stations [i]);
165 // Don't penalise these first footpaths:
166 for (auto t: transfer_pair)
167 earliest_connection [t.first] = start_time;
168 //earliest_connection [t.first] = start_time + t.second;
169 }
170 }
171}
172
173CSA_Return csa::main_csa_loop (
174 const CSA_Parameters &csa_pars,
175 const std::unordered_set <size_t> &start_stations_set,
176 std::unordered_set <size_t> &end_stations_set,
177 const CSA_Inputs &csa_in,
178 CSA_Outputs &csa_out)
179{
180
181 CSA_Return csa_ret;
182 csa_ret.earliest_time = INFINITE_INT;
183 csa_ret.end_station = INFINITE_INT;
184
185 std::vector <bool> is_connected (csa_pars.ntrips, false);
186
187 // trip connections:
188 for (size_t i = 0; i < csa_pars.timetable_size; i++)
189 {
190 if (csa_in.departure_time [i] < csa_pars.start_time)
191 continue; // # nocov - these lines already removed in R fn.
192
193 // add all departures from start_stations_set:
194 if (start_stations_set.find (csa_in.departure_station [i]) !=
195 start_stations_set.end () &&
196 csa_in.arrival_time [i] <= csa_out.earliest_connection [csa_in.arrival_station [i] ])
197 {
198 is_connected [csa_in.trip_id [i] ] = true;
199 csa::fill_one_csa_out (csa_out, csa_in,
200 csa_in.arrival_station [i], i);
201 }
202
203 // main connection scan:
204 if (((csa_out.earliest_connection [csa_in.departure_station [i] ] <= csa_in.departure_time [i]) &&
205 csa_out.n_transfers [csa_in.departure_station [i] ] <= csa_pars.max_transfers) ||
206 is_connected [csa_in.trip_id [i]])
207 {
208 const bool time_earlier = csa_in.arrival_time [i] < csa_out.earliest_connection [csa_in.arrival_station [i] ];
209 const bool time_equal = csa_in.arrival_time [i] == csa_out.earliest_connection [csa_in.arrival_station [i] ];
210 const bool less_transfers = csa_out.n_transfers [csa_in.departure_station [i] ] <
211 csa_out.n_transfers [csa_in.arrival_station [i] ];
212
213 if (time_earlier || (time_equal && less_transfers))
214 {
215 csa::fill_one_csa_out (csa_out, csa_in,
216 csa_in.arrival_station [i], i);
217
218 csa_out.n_transfers [csa_in.arrival_station [i] ] =
219 csa_out.n_transfers [csa_in.departure_station [i] ];
220 }
221 csa::check_end_stations (end_stations_set, csa_in.arrival_station [i],
222 csa_in.arrival_time [i], csa_ret);
223
224 if (csa_in.transfer_map.find (csa_in.arrival_station [i]) != csa_in.transfer_map.end ())
225 {
226 for (auto t: csa_in.transfer_map.at (csa_in.arrival_station [i]))
227 {
228 size_t trans_dest = t.first;
229 int ttime = csa_in.arrival_time [i] + t.second;
230 if (ttime <= csa_out.earliest_connection [trans_dest] &&
231 csa_out.n_transfers [trans_dest] <= csa_pars.max_transfers)
232 {
233 // modified version of fill_one_csa_out:
234 csa_out.earliest_connection [trans_dest] = ttime;
235 csa_out.prev_stn [trans_dest] = csa_in.arrival_station [i];
236 csa_out.prev_time [trans_dest] = csa_in.arrival_time [i];
237 csa_out.n_transfers [trans_dest]++;
238
239 csa::check_end_stations (end_stations_set,
240 trans_dest, ttime, csa_ret);
241
242 }
243 }
244 }
245 is_connected [csa_in.trip_id [i]] = true;
246 }
247 if (end_stations_set.size () == 0)
248 break;
249 }
250 return csa_ret;
251}
252
253/*!
254 * \param i index into station of csa_out for the connecting service csa_in
255 * \param j index into csa_in
256 */
257void csa::fill_one_csa_out (
258 CSA_Outputs &csa_out,
259 const CSA_Inputs &csa_in,
260 const size_t &i,
261 const size_t &j)
262{
263
264 bool fill_vals = (csa_in.arrival_time [j] < csa_out.earliest_connection [i]);
265 if (!fill_vals) {
266 // service at that time already exists, so only replace if trip_id of
267 // csa_in is same as trip that connected to the departure station.
268 // This clause ensures connection remains on same service in cases of
269 // parallel services; see #48
270 const size_t this_stn = csa_in.departure_station [j];
271 const size_t prev_trip = csa_out.current_trip [this_stn];
272
273 fill_vals = (csa_in.trip_id [j] == prev_trip);
274 }
275
276 if (fill_vals) {
277 csa_out.earliest_connection [i] = csa_in.arrival_time [j];
278 csa_out.current_trip [i] = csa_in.trip_id [j];
279 csa_out.prev_stn [i] = csa_in.departure_station [j];
280 csa_out.prev_time [i] = csa_in.departure_time [j];
281 }
282}
283
284void csa::check_end_stations (
285 std::unordered_set <size_t> &end_stations_set,
286 const size_t &arrival_station,
287 const int &arrival_time,
288 CSA_Return &csa_ret)
289{
290
291 if (end_stations_set.find (arrival_station) != end_stations_set.end ())
292 {
293 if (arrival_time < csa_ret.earliest_time)
294 {
295 csa_ret.earliest_time = arrival_time;
296 csa_ret.end_station = arrival_station;
297 }
298 end_stations_set.erase (arrival_station);
299 }
300}
301
302size_t csa::get_route_length (
303 const CSA_Outputs &csa_out,
304 const CSA_Parameters &csa_pars,
305 const size_t &end_stn)
306{
307
308 size_t count = 1;
309 size_t i = end_stn;
310 while (i < INFINITE_INT)
311 {
312 count++;
313 i = csa_out.prev_stn [static_cast <size_t> (i)];
314 if (count > csa_pars.nstations)
315 Rcpp::stop ("no route found; something went wrong"); // # nocov
316 }
317
318 return count;
319}
320
321void csa::extract_final_trip (
322 const CSA_Outputs &csa_out,
323 const CSA_Return &csa_ret,
324 std::vector <size_t> &end_station,
325 std::vector <size_t> &trip,
326 std::vector <int> &time)
327{
328
329 size_t i = csa_ret.end_station;
330 if (i > csa_out.current_trip.size ()) // No route able to be found
331 {
332 end_station.clear ();
333 time.clear ();
334 trip.clear ();
335 } else
336 {
337 time [0] = csa_ret.earliest_time;
338 trip [0] = csa_out.current_trip [i];
339 end_station [0] = i;
340 size_t count = 1;
341 while (i < INFINITE_INT)
342 {
343 time [count] = csa_out.prev_time [i];
344 i = csa_out.prev_stn [static_cast <size_t> (i)];
345 end_station [count] = i;
346 if (i < INFINITE_INT)
347 trip [count] = csa_out.current_trip [i];
348 count++;
349 }
350 // The last entry of these is all INF, so must be removed.
351 end_station.resize (end_station.size () - 1);
352 time.resize (time.size () - 1);
353 trip.resize (trip.size () - 1);
354 // trip values don't exist for start stations of each route, so
355 for (size_t j = 1; j < trip.size (); j++)
356 if (trip [j] == INFINITE_INT)
357 trip [j] = trip [j - 1];
358 }
359}