R package for downloading OpenStreetMap data
1/***************************************************************************
2 * Project: osmdata
3 * File: osmdata-sp.cpp
4 * Language: C++
5 *
6 * osmdata is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * osmdata is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * osm-router. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Author: Mark Padgham
20 * E-Mail: mark.padgham@email.com
21 *
22 * Description: Extract OSM data from an object of class XmlData and return
23 * it in R 'sp' format.
24 *
25 * Limitations:
26 *
27 * Dependencies: none (rapidXML header included in osmdata)
28 *
29 * Compiler Options: -std=c++11
30 ***************************************************************************/
31
32#include "osmdata.h"
33
34#include <Rcpp.h>
35
36#include <algorithm> // for min_element/max_element
37
38// Note: This code uses explicit index counters within most loops which use Rcpp
39// objects, because these otherwise require a
40// static_cast <size_t> (std::distance (...)). This operation copies each
41// instance and can slow the loops down by several orders of magnitude!
42
43//' get_osm_nodes
44//'
45//' Store OSM nodes as `sf::POINT` objects
46//'
47//' @param ptxy Pointer to Rcpp::List to hold the resultant geometries
48//' @param kv_mat Pointer to Rcpp::DataFrame to hold key-value pairs
49//' @param nodes Pointer to all nodes in data set
50//' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set
51//' @param bbox Pointer to the bbox needed for `sf` construction
52//' @param crs Pointer to the crs needed for `sf` construction
53//'
54//' @noRd
55void osm_sp::get_osm_nodes (Rcpp::S4 &sp_points, const Nodes &nodes,
56 const UniqueVals &unique_vals)
57{
58 Rcpp::NumericMatrix ptxy;
59 Rcpp::CharacterMatrix kv_mat;
60 size_t nrow = nodes.size (), ncol = unique_vals.k_point.size ();
61
62 kv_mat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrow, ncol));
63 std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING);
64
65 ptxy = Rcpp::NumericMatrix (Rcpp::Dimension (nrow, 2));
66 std::vector <std::string> ptnames;
67 ptnames.reserve (nodes.size ());
68 unsigned int count = 0;
69 for (auto ni = nodes.begin (); ni != nodes.end (); ++ni)
70 {
71 Rcpp::checkUserInterrupt ();
72 ptxy (count, 0) = ni->second.lon;
73 ptxy (count, 1) = ni->second.lat;
74 ptnames.push_back (std::to_string (ni->first));
75 for (auto kv_iter = ni->second.key_val.begin ();
76 kv_iter != ni->second.key_val.end (); ++kv_iter)
77 {
78 const std::string &key = kv_iter->first;
79 unsigned int ndi = static_cast <unsigned int> (
80 std::distance (unique_vals.k_point.begin (),
81 unique_vals.k_point.find (key)));
82 kv_mat (count, ndi) = kv_iter->second;
83 }
84 count++;
85 }
86 std::vector <std::string> colnames = {"lon", "lat"};
87 Rcpp::List dimnames (0);
88 dimnames.push_back (ptnames);
89 dimnames.push_back (colnames);
90 ptxy.attr ("dimnames") = dimnames;
91 dimnames.erase (0, static_cast <int> (dimnames.size ()));
92
93 Rcpp::DataFrame kv_df = R_NilValue;
94 if (unique_vals.k_point.size () > 0)
95 {
96 kv_mat.attr ("dimnames") = Rcpp::List::create (ptnames, unique_vals.k_point);
97 kv_mat.attr ("names") = unique_vals.k_point;
98 kv_df = osm_convert::restructure_kv_mat (kv_mat, false);
99 }
100
101 Rcpp::Language points_call ("new", "SpatialPoints");
102 Rcpp::Language sp_points_call ("new", "SpatialPointsDataFrame");
103 sp_points = sp_points_call.eval ();
104 sp_points.slot ("data") = kv_df;
105 sp_points.slot ("coords") = ptxy;
106}
107
108
109//' get_osm_ways
110//'
111//' Store OSM ways as `sf::LINESTRING` or `sf::POLYGON` objects.
112//'
113//' @param wayList Pointer to Rcpp::List to hold the resultant geometries
114//' @param kv_df Pointer to Rcpp::DataFrame to hold key-value pairs
115//' @param way_ids Vector of <osmid_t> IDs of ways to trace
116//' @param ways Pointer to all ways in data set
117//' @param nodes Pointer to all nodes in data set
118//' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set
119//' @param geom_type Character string specifying "POLYGON" or "LINESTRING"
120//' @param bbox Pointer to the bbox needed for `sf` construction
121//' @param crs Pointer to the crs needed for `sf` construction
122//'
123//' @noRd
124void osm_sp::get_osm_ways (Rcpp::S4 &sp_ways,
125 const std::set <osmid_t> &way_ids, const Ways &ways, const Nodes &nodes,
126 const UniqueVals &unique_vals, const std::string &geom_type)
127{
128 const int one = static_cast <int> (1);
129
130 if (!(geom_type == "line" || geom_type == "polygon"))
131 throw std::runtime_error ("geom_type must be line or polygon");
132
133 Rcpp::List wayList (way_ids.size ());
134
135 size_t nrow = way_ids.size (), ncol = unique_vals.k_way.size ();
136 std::vector <std::string> waynames;
137 waynames.reserve (way_ids.size ());
138
139 Rcpp::Language line_call ("new", "Line");
140 Rcpp::Language lines_call ("new", "Lines");
141 Rcpp::Environment sp_env = Rcpp::Environment::namespace_env ("sp");
142 Rcpp::Function Polygon = sp_env ["Polygon"];
143 Rcpp::Language polygons_call ("new", "Polygons");
144 Rcpp::S4 polygons, line, lines;
145
146 // index of ill-formed polygons later removed - see issue#85
147 std::vector <unsigned int> indx_out;
148 std::vector <bool> poly_okay (nrow);
149
150 Rcpp::CharacterMatrix kv_mat (Rcpp::Dimension (nrow, ncol));
151 std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING);
152 unsigned int count = 0;
153 for (auto wi = way_ids.begin (); wi != way_ids.end (); ++wi)
154 {
155 Rcpp::checkUserInterrupt ();
156 waynames.push_back (std::to_string (*wi));
157 Rcpp::NumericMatrix nmat;
158 osm_convert::trace_way_nmat (ways, nodes, (*wi), nmat);
159 Rcpp::List dummy_list (0);
160 poly_okay [count] = true;
161 if (geom_type == "line")
162 {
163 // sp::Line and sp::Lines objects can be constructed directly from
164 // the data with the following two lines, but this is *enormously*
165 // slower:
166 // Rcpp::S4 line = Rcpp::Language ("Line", nmat).eval ();
167 // Rcpp::S4 lines = Rcpp::Language ("Lines", line, id).eval ();
168 // This way of constructing "new" objects and feeding slots is much
169 // faster:
170 line = line_call.eval ();
171 line.slot ("coords") = nmat;
172 dummy_list.push_back (line);
173 lines = lines_call.eval ();
174 lines.slot ("Lines") = dummy_list;
175 lines.slot ("ID") = (*wi);
176 wayList [count] = lines;
177 } else
178 {
179 const double dtol = 1.0e-6;
180 if (nmat.nrow () == 3 && fabs (nmat (0, 0) - nmat (2, 0)) < dtol &&
181 (fabs (nmat (0, 1) - nmat (2, 1)) < dtol))
182 {
183 // polygon has only 3 rows with start == end, so is ill-formed
184 indx_out.push_back (count);
185 poly_okay [count] = false;
186 // temp copy with > 3 rows necessary to suppress sp warning
187 Rcpp::NumericMatrix nmat2 =
188 Rcpp::NumericMatrix (Rcpp::Dimension (4, 2));
189 nmat = nmat2;
190 }
191
192 Rcpp::S4 poly = Polygon (nmat);
193 poly.slot ("hole") = false;
194 poly.slot ("ringDir") = one;
195 dummy_list.push_back (poly);
196 polygons = polygons_call.eval ();
197 polygons.slot ("Polygons") = dummy_list;
198 polygons.slot ("ID") = (*wi);
199 polygons.slot ("plotOrder") = one;
200 polygons.slot ("labpt") = poly.slot ("labpt");
201 polygons.slot ("area") = poly.slot ("area");
202 wayList [count] = polygons;
203 }
204 dummy_list.erase (0);
205 auto wj = ways.find (*wi);
206 osm_convert::get_value_mat_way (wj, unique_vals, kv_mat, count++);
207 } // end for it over poly_ways
208 if (indx_out.size () > 0)
209 {
210 std::reverse (indx_out.begin (), indx_out.end ());
211 for (auto i: indx_out)
212 {
213 wayList.erase (one);
214 waynames.erase (waynames.begin () + i);
215 }
216 }
217 wayList.attr ("names") = waynames;
218
219 // Reduce kv_mat to indx_in
220 if (indx_out.size () > 0)
221 {
222 size_t n_okay = nrow - indx_out.size ();
223 Rcpp::CharacterMatrix kv_mat2 (Rcpp::Dimension (n_okay, ncol));
224 std::fill (kv_mat2.begin (), kv_mat2.end (), NA_STRING);
225 int pos = 0, i_int = 0;
226 for (size_t i = 0; i<nrow; i++)
227 {
228 if (poly_okay [i]) // this has to be size_t, but kv_mat is Rcpp so int
229 kv_mat2 (pos++, Rcpp::_) =
230 kv_mat (i_int, Rcpp::_);
231 i_int++;
232 }
233 kv_mat = kv_mat2;
234 }
235
236 Rcpp::DataFrame kv_df = R_NilValue;
237 if (way_ids.size () > 0)
238 {
239 kv_mat.attr ("names") = unique_vals.k_way;
240 kv_mat.attr ("dimnames") = Rcpp::List::create (waynames, unique_vals.k_way);
241 kv_mat.attr ("names") = unique_vals.k_way;
242 if (kv_mat.nrow () > 0 && kv_mat.ncol () > 0)
243 kv_df = osm_convert::restructure_kv_mat (kv_mat, false);
244 // TODO: Can names be assigned to R_NilValue?
245 }
246
247 if (geom_type == "line")
248 {
249 Rcpp::Language sp_lines_call ("new", "SpatialLinesDataFrame");
250 sp_ways = sp_lines_call.eval ();
251 sp_ways.slot ("lines") = wayList;
252 sp_ways.slot ("data") = kv_df;
253 } else
254 {
255 Rcpp::Language sp_polys_call ("new", "SpatialPolygonsDataFrame");
256 sp_ways = sp_polys_call.eval ();
257 sp_ways.slot ("polygons") = wayList;
258 // Fill plotOrder slot with int vector - this has to be int, not
259 // unsigned int!
260 std::vector <int> plord;
261 for (int i=0; i<static_cast <int> (nrow); i++)
262 plord.push_back (i + 1);
263 sp_ways.slot ("plotOrder") = plord;
264 plord.clear ();
265 sp_ways.slot ("data") = kv_df;
266 }
267}
268
269
270//' get_osm_relations
271//'
272//' Return a dual Rcpp::List containing all OSM relations, the firmt element of
273//' which holds `multipolygon` relations, while the second holds all others,
274//' which are stored as `multilinestring` objects.
275//'
276//' @param rels Pointer to the vector of Relation objects
277//' @param nodes Pointer to the vector of node objects
278//' @param ways Pointer to the vector of way objects
279//' @param unique_vals Pointer to a UniqueVals object containing std::sets of all
280//' unique IDs and keys for each kind of OSM object (nodes, ways, rels).
281//'
282//' @return A dual Rcpp::List, the first of which contains the multipolygon
283//' relations; the second the multilinestring relations.
284//'
285//' @noRd
286void osm_sp::get_osm_relations (Rcpp::S4 &multilines, Rcpp::S4 &multipolygons,
287 const Relations &rels, const std::map <osmid_t, Node> &nodes,
288 const std::map <osmid_t, OneWay> &ways, const UniqueVals &unique_vals)
289{
290 /* Trace all multipolygon relations. These are the only OSM types where
291 * sizes are not known before, so lat-lons and node names are stored in
292 * dynamic vectors. These are 3D monsters: #1 for relation, #2 for polygon
293 * in relation, and #3 for data. There are also associated 2D vector<vector>
294 * objects for IDs and multilinestring roles. */
295 std::set <std::string> keyset; // must be ordered!
296 std::vector <std::string> colnames = {"lat", "lon"}, rownames;
297 Rcpp::List dimnames (0);
298 Rcpp::NumericMatrix nmat (Rcpp::Dimension (0, 0));
299
300 double_arr2 lat_vec, lon_vec;
301 double_arr3 lat_arr_mp, lon_arr_mp, lon_arr_ls, lat_arr_ls;
302 string_arr2 rowname_vec, id_vec_mp, roles_ls;
303 string_arr3 rowname_arr_mp, rowname_arr_ls;
304 std::vector <osmid_t> ids_ls;
305 std::vector <std::string> ids_mp, rel_id_mp, rel_id_ls;
306 osmt_arr2 id_vec_ls;
307 std::vector <std::string> roles;
308
309 unsigned int nmp = 0, nls = 0; // number of multipolygon and multilinestringrelations
310 for (auto itr = rels.begin (); itr != rels.end (); ++itr)
311 {
312 if (itr->ispoly)
313 nmp++;
314 else
315 {
316 // TODO: Store these as std::vector <std::set <>> to avoid
317 // repetition below
318 std::set <std::string> roles_set;
319 for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw)
320 roles_set.insert (itw->second);
321 nls += roles_set.size ();
322 }
323 }
324
325 size_t ncol = unique_vals.k_rel.size ();
326 rel_id_mp.reserve (nmp);
327 rel_id_ls.reserve (nls);
328
329 Rcpp::CharacterMatrix kv_mat_mp (Rcpp::Dimension (nmp, ncol)),
330 kv_mat_ls (Rcpp::Dimension (nls, ncol));
331 unsigned int count_mp = 0, count_ls = 0;
332
333 for (auto itr = rels.begin (); itr != rels.end (); ++itr)
334 {
335 Rcpp::checkUserInterrupt ();
336 if (itr->ispoly) // itr->second can only be "outer" or "inner"
337 {
338 trace_multipolygon (itr, ways, nodes, lon_vec, lat_vec,
339 rowname_vec, ids_mp);
340 // Store all ways in that relation and their associated roles
341 rel_id_mp.push_back (std::to_string (itr->id));
342 lon_arr_mp.push_back (lon_vec);
343 lat_arr_mp.push_back (lat_vec);
344 rowname_arr_mp.push_back (rowname_vec);
345 id_vec_mp.push_back (ids_mp);
346
347 lon_vec.clear ();
348 lon_vec.shrink_to_fit ();
349 lat_vec.clear ();
350 lat_vec.shrink_to_fit ();
351 rowname_vec.clear ();
352 rowname_vec.shrink_to_fit ();
353 ids_mp.clear ();
354 ids_mp.shrink_to_fit ();
355
356 if (nmp > 0)
357 osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_mp, count_mp++);
358 } else // store as multilinestring
359 {
360 // multistrings are grouped here by roles, unlike GDAL which just
361 // dumps all of them.
362 std::set <std::string> roles_set;
363 for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw)
364 roles_set.insert (itw->second);
365 roles.reserve (roles_set.size ());
366 for (auto it = roles_set.begin (); it != roles_set.end (); ++it)
367 roles.push_back (*it);
368 roles_set.clear ();
369 for (std::string role: roles)
370 {
371 trace_multilinestring (itr, role, ways, nodes,
372 lon_vec, lat_vec, rowname_vec, ids_ls);
373 std::stringstream ss;
374 ss.str ("");
375 if (role == "")
376 ss << std::to_string (itr->id) << "-(no role)";
377 else
378 ss << std::to_string (itr->id) << "-" << role;
379 rel_id_ls.push_back (ss.str ());
380 lon_arr_ls.push_back (lon_vec);
381 lat_arr_ls.push_back (lat_vec);
382 rowname_arr_ls.push_back (rowname_vec);
383 id_vec_ls.push_back (ids_ls);
384
385 lon_vec.clear ();
386 lon_vec.shrink_to_fit ();
387 lat_vec.clear ();
388 lat_vec.shrink_to_fit ();
389 rowname_vec.clear ();
390 rowname_vec.shrink_to_fit ();
391 ids_ls.clear ();
392 ids_ls.shrink_to_fit ();
393
394 if (nls > 0)
395 osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_ls, count_ls++);
396 }
397 roles_ls.push_back (roles);
398 roles.clear ();
399 }
400 }
401
402 osm_convert::convert_multipoly_to_sp (multipolygons, rels, lon_arr_mp,
403 lat_arr_mp, rowname_arr_mp, id_vec_mp, unique_vals);
404 osm_convert::convert_multiline_to_sp (multilines, rels, lon_arr_ls,
405 lat_arr_ls, rowname_arr_ls, id_vec_ls, unique_vals);
406
407 // ****** clean up *****
408 lon_arr_mp.clear ();
409 lon_arr_mp.shrink_to_fit ();
410 lon_arr_ls.clear ();
411 lon_arr_ls.shrink_to_fit ();
412 lat_arr_mp.clear ();
413 lat_arr_mp.shrink_to_fit ();
414 lat_arr_ls.clear ();
415 lat_arr_ls.shrink_to_fit ();
416 rowname_arr_mp.clear ();
417 rowname_arr_mp.shrink_to_fit ();
418 rowname_arr_ls.clear ();
419 rowname_arr_ls.shrink_to_fit ();
420
421 rel_id_mp.clear ();
422 rel_id_mp.shrink_to_fit ();
423 rel_id_ls.clear ();
424 rel_id_ls.shrink_to_fit ();
425 roles_ls.clear ();
426 roles_ls.shrink_to_fit ();
427}
428
429
430//' rcpp_osmdata_sp
431//'
432//' Extracts all polygons from an overpass API query
433//'
434//' @param st Text contents of an overpass API query
435//' @return A \code{SpatialLinesDataFrame} contains all polygons and associated data
436//'
437//' @noRd
438// [[Rcpp::export]]
439Rcpp::List rcpp_osmdata_sp (const std::string& st)
440{
441#ifdef DUMP_INPUT
442 {
443 std::ofstream dump ("./osmdata-sp.xml");
444 if (dump.is_open())
445 {
446 dump.write (st.c_str(), st.size());
447 }
448 }
449#endif
450
451 XmlData xml (st);
452
453 const std::map <osmid_t, Node>& nodes = xml.nodes ();
454 const std::map <osmid_t, OneWay>& ways = xml.ways ();
455 const std::vector <Relation>& rels = xml.relations ();
456 const UniqueVals unique_vals = xml.unique_vals ();
457
458
459 /************************************************************************
460 ************************************************************************
461 ** **
462 ** PRE-PROCESSING **
463 ** **
464 ************************************************************************
465 ************************************************************************/
466
467 // Step#2
468 std::set <osmid_t> poly_ways, non_poly_ways;
469 for (auto itw = ways.begin (); itw != ways.end (); ++itw)
470 {
471 if ((*itw).second.nodes.front () == (*itw).second.nodes.back ())
472 {
473 if (poly_ways.find ((*itw).first) == poly_ways.end ())
474 poly_ways.insert ((*itw).first);
475 } else if (non_poly_ways.find ((*itw).first) == non_poly_ways.end ())
476 non_poly_ways.insert ((*itw).first);
477 }
478
479 /************************************************************************
480 ************************************************************************
481 ** **
482 ** GET OSM DATA **
483 ** **
484 ************************************************************************
485 ************************************************************************/
486
487 // The actual routines to extract the OSM data and store in sp objects
488 Rcpp::S4 sp_points, sp_lines, sp_polygons, sp_multilines, sp_multipolygons;
489 osm_sp::get_osm_ways (sp_polygons, poly_ways, ways, nodes, unique_vals, "polygon");
490 osm_sp::get_osm_ways (sp_lines, non_poly_ways, ways, nodes, unique_vals, "line");
491 osm_sp::get_osm_nodes (sp_points, nodes, unique_vals);
492 osm_sp::get_osm_relations (sp_multilines, sp_multipolygons,
493 rels, nodes, ways, unique_vals);
494
495 // Add bbox and crs to each sp object
496 Rcpp::NumericMatrix bbox = rcpp_get_bbox (xml.x_min (), xml.x_max (),
497 xml.y_min (), xml.y_max ());
498 sp_points.slot ("bbox") = bbox;
499 sp_lines.slot ("bbox") = bbox;
500 sp_polygons.slot ("bbox") = bbox;
501 sp_multilines.slot ("bbox") = bbox;
502 sp_multipolygons.slot ("bbox") = bbox;
503
504 Rcpp::Language crs_call ("new", "CRS");
505 Rcpp::S4 crs = crs_call.eval ();
506 crs.slot ("projargs") = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0";
507 sp_points.slot ("proj4string") = crs;
508 sp_lines.slot ("proj4string") = crs;
509 sp_polygons.slot ("proj4string") = crs;
510 sp_multilines.slot ("proj4string") = crs;
511 sp_multipolygons.slot ("proj4string") = crs;
512
513 Rcpp::List ret (6);
514 ret [0] = bbox;
515 ret [1] = sp_points;
516 ret [2] = sp_lines;
517 ret [3] = sp_polygons;
518 ret [4] = sp_multilines;
519 ret [5] = sp_multipolygons;
520
521 std::vector <std::string> retnames {"bbox", "points", "lines", "polygons",
522 "multilines", "multipolygons"};
523 ret.attr ("names") = retnames;
524
525 return ret;
526}