R package for downloading OpenStreetMap data
at main 526 lines 21 kB view raw
1/*************************************************************************** 2 * Project: osmdata 3 * File: osmdata-sp.cpp 4 * Language: C++ 5 * 6 * osmdata is free software: you can redistribute it and/or modify it under 7 * the terms of the GNU General Public License as published by the Free 8 * Software Foundation, either version 3 of the License, or (at your option) 9 * any later version. 10 * 11 * osmdata is distributed in the hope that it will be useful, but WITHOUT ANY 12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 14 * details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * osm-router. If not, see <http://www.gnu.org/licenses/>. 18 * 19 * Author: Mark Padgham 20 * E-Mail: mark.padgham@email.com 21 * 22 * Description: Extract OSM data from an object of class XmlData and return 23 * it in R 'sp' format. 24 * 25 * Limitations: 26 * 27 * Dependencies: none (rapidXML header included in osmdata) 28 * 29 * Compiler Options: -std=c++11 30 ***************************************************************************/ 31 32#include "osmdata.h" 33 34#include <Rcpp.h> 35 36#include <algorithm> // for min_element/max_element 37 38// Note: This code uses explicit index counters within most loops which use Rcpp 39// objects, because these otherwise require a 40// static_cast <size_t> (std::distance (...)). This operation copies each 41// instance and can slow the loops down by several orders of magnitude! 42 43//' get_osm_nodes 44//' 45//' Store OSM nodes as `sf::POINT` objects 46//' 47//' @param ptxy Pointer to Rcpp::List to hold the resultant geometries 48//' @param kv_mat Pointer to Rcpp::DataFrame to hold key-value pairs 49//' @param nodes Pointer to all nodes in data set 50//' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set 51//' @param bbox Pointer to the bbox needed for `sf` construction 52//' @param crs Pointer to the crs needed for `sf` construction 53//' 54//' @noRd 55void osm_sp::get_osm_nodes (Rcpp::S4 &sp_points, const Nodes &nodes, 56 const UniqueVals &unique_vals) 57{ 58 Rcpp::NumericMatrix ptxy; 59 Rcpp::CharacterMatrix kv_mat; 60 size_t nrow = nodes.size (), ncol = unique_vals.k_point.size (); 61 62 kv_mat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrow, ncol)); 63 std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING); 64 65 ptxy = Rcpp::NumericMatrix (Rcpp::Dimension (nrow, 2)); 66 std::vector <std::string> ptnames; 67 ptnames.reserve (nodes.size ()); 68 unsigned int count = 0; 69 for (auto ni = nodes.begin (); ni != nodes.end (); ++ni) 70 { 71 Rcpp::checkUserInterrupt (); 72 ptxy (count, 0) = ni->second.lon; 73 ptxy (count, 1) = ni->second.lat; 74 ptnames.push_back (std::to_string (ni->first)); 75 for (auto kv_iter = ni->second.key_val.begin (); 76 kv_iter != ni->second.key_val.end (); ++kv_iter) 77 { 78 const std::string &key = kv_iter->first; 79 unsigned int ndi = static_cast <unsigned int> ( 80 std::distance (unique_vals.k_point.begin (), 81 unique_vals.k_point.find (key))); 82 kv_mat (count, ndi) = kv_iter->second; 83 } 84 count++; 85 } 86 std::vector <std::string> colnames = {"lon", "lat"}; 87 Rcpp::List dimnames (0); 88 dimnames.push_back (ptnames); 89 dimnames.push_back (colnames); 90 ptxy.attr ("dimnames") = dimnames; 91 dimnames.erase (0, static_cast <int> (dimnames.size ())); 92 93 Rcpp::DataFrame kv_df = R_NilValue; 94 if (unique_vals.k_point.size () > 0) 95 { 96 kv_mat.attr ("dimnames") = Rcpp::List::create (ptnames, unique_vals.k_point); 97 kv_mat.attr ("names") = unique_vals.k_point; 98 kv_df = osm_convert::restructure_kv_mat (kv_mat, false); 99 } 100 101 Rcpp::Language points_call ("new", "SpatialPoints"); 102 Rcpp::Language sp_points_call ("new", "SpatialPointsDataFrame"); 103 sp_points = sp_points_call.eval (); 104 sp_points.slot ("data") = kv_df; 105 sp_points.slot ("coords") = ptxy; 106} 107 108 109//' get_osm_ways 110//' 111//' Store OSM ways as `sf::LINESTRING` or `sf::POLYGON` objects. 112//' 113//' @param wayList Pointer to Rcpp::List to hold the resultant geometries 114//' @param kv_df Pointer to Rcpp::DataFrame to hold key-value pairs 115//' @param way_ids Vector of <osmid_t> IDs of ways to trace 116//' @param ways Pointer to all ways in data set 117//' @param nodes Pointer to all nodes in data set 118//' @param unique_vals pointer to all unique values (OSM IDs and keys) in data set 119//' @param geom_type Character string specifying "POLYGON" or "LINESTRING" 120//' @param bbox Pointer to the bbox needed for `sf` construction 121//' @param crs Pointer to the crs needed for `sf` construction 122//' 123//' @noRd 124void osm_sp::get_osm_ways (Rcpp::S4 &sp_ways, 125 const std::set <osmid_t> &way_ids, const Ways &ways, const Nodes &nodes, 126 const UniqueVals &unique_vals, const std::string &geom_type) 127{ 128 const int one = static_cast <int> (1); 129 130 if (!(geom_type == "line" || geom_type == "polygon")) 131 throw std::runtime_error ("geom_type must be line or polygon"); 132 133 Rcpp::List wayList (way_ids.size ()); 134 135 size_t nrow = way_ids.size (), ncol = unique_vals.k_way.size (); 136 std::vector <std::string> waynames; 137 waynames.reserve (way_ids.size ()); 138 139 Rcpp::Language line_call ("new", "Line"); 140 Rcpp::Language lines_call ("new", "Lines"); 141 Rcpp::Environment sp_env = Rcpp::Environment::namespace_env ("sp"); 142 Rcpp::Function Polygon = sp_env ["Polygon"]; 143 Rcpp::Language polygons_call ("new", "Polygons"); 144 Rcpp::S4 polygons, line, lines; 145 146 // index of ill-formed polygons later removed - see issue#85 147 std::vector <unsigned int> indx_out; 148 std::vector <bool> poly_okay (nrow); 149 150 Rcpp::CharacterMatrix kv_mat (Rcpp::Dimension (nrow, ncol)); 151 std::fill (kv_mat.begin (), kv_mat.end (), NA_STRING); 152 unsigned int count = 0; 153 for (auto wi = way_ids.begin (); wi != way_ids.end (); ++wi) 154 { 155 Rcpp::checkUserInterrupt (); 156 waynames.push_back (std::to_string (*wi)); 157 Rcpp::NumericMatrix nmat; 158 osm_convert::trace_way_nmat (ways, nodes, (*wi), nmat); 159 Rcpp::List dummy_list (0); 160 poly_okay [count] = true; 161 if (geom_type == "line") 162 { 163 // sp::Line and sp::Lines objects can be constructed directly from 164 // the data with the following two lines, but this is *enormously* 165 // slower: 166 // Rcpp::S4 line = Rcpp::Language ("Line", nmat).eval (); 167 // Rcpp::S4 lines = Rcpp::Language ("Lines", line, id).eval (); 168 // This way of constructing "new" objects and feeding slots is much 169 // faster: 170 line = line_call.eval (); 171 line.slot ("coords") = nmat; 172 dummy_list.push_back (line); 173 lines = lines_call.eval (); 174 lines.slot ("Lines") = dummy_list; 175 lines.slot ("ID") = (*wi); 176 wayList [count] = lines; 177 } else 178 { 179 const double dtol = 1.0e-6; 180 if (nmat.nrow () == 3 && fabs (nmat (0, 0) - nmat (2, 0)) < dtol && 181 (fabs (nmat (0, 1) - nmat (2, 1)) < dtol)) 182 { 183 // polygon has only 3 rows with start == end, so is ill-formed 184 indx_out.push_back (count); 185 poly_okay [count] = false; 186 // temp copy with > 3 rows necessary to suppress sp warning 187 Rcpp::NumericMatrix nmat2 = 188 Rcpp::NumericMatrix (Rcpp::Dimension (4, 2)); 189 nmat = nmat2; 190 } 191 192 Rcpp::S4 poly = Polygon (nmat); 193 poly.slot ("hole") = false; 194 poly.slot ("ringDir") = one; 195 dummy_list.push_back (poly); 196 polygons = polygons_call.eval (); 197 polygons.slot ("Polygons") = dummy_list; 198 polygons.slot ("ID") = (*wi); 199 polygons.slot ("plotOrder") = one; 200 polygons.slot ("labpt") = poly.slot ("labpt"); 201 polygons.slot ("area") = poly.slot ("area"); 202 wayList [count] = polygons; 203 } 204 dummy_list.erase (0); 205 auto wj = ways.find (*wi); 206 osm_convert::get_value_mat_way (wj, unique_vals, kv_mat, count++); 207 } // end for it over poly_ways 208 if (indx_out.size () > 0) 209 { 210 std::reverse (indx_out.begin (), indx_out.end ()); 211 for (auto i: indx_out) 212 { 213 wayList.erase (one); 214 waynames.erase (waynames.begin () + i); 215 } 216 } 217 wayList.attr ("names") = waynames; 218 219 // Reduce kv_mat to indx_in 220 if (indx_out.size () > 0) 221 { 222 size_t n_okay = nrow - indx_out.size (); 223 Rcpp::CharacterMatrix kv_mat2 (Rcpp::Dimension (n_okay, ncol)); 224 std::fill (kv_mat2.begin (), kv_mat2.end (), NA_STRING); 225 int pos = 0, i_int = 0; 226 for (size_t i = 0; i<nrow; i++) 227 { 228 if (poly_okay [i]) // this has to be size_t, but kv_mat is Rcpp so int 229 kv_mat2 (pos++, Rcpp::_) = 230 kv_mat (i_int, Rcpp::_); 231 i_int++; 232 } 233 kv_mat = kv_mat2; 234 } 235 236 Rcpp::DataFrame kv_df = R_NilValue; 237 if (way_ids.size () > 0) 238 { 239 kv_mat.attr ("names") = unique_vals.k_way; 240 kv_mat.attr ("dimnames") = Rcpp::List::create (waynames, unique_vals.k_way); 241 kv_mat.attr ("names") = unique_vals.k_way; 242 if (kv_mat.nrow () > 0 && kv_mat.ncol () > 0) 243 kv_df = osm_convert::restructure_kv_mat (kv_mat, false); 244 // TODO: Can names be assigned to R_NilValue? 245 } 246 247 if (geom_type == "line") 248 { 249 Rcpp::Language sp_lines_call ("new", "SpatialLinesDataFrame"); 250 sp_ways = sp_lines_call.eval (); 251 sp_ways.slot ("lines") = wayList; 252 sp_ways.slot ("data") = kv_df; 253 } else 254 { 255 Rcpp::Language sp_polys_call ("new", "SpatialPolygonsDataFrame"); 256 sp_ways = sp_polys_call.eval (); 257 sp_ways.slot ("polygons") = wayList; 258 // Fill plotOrder slot with int vector - this has to be int, not 259 // unsigned int! 260 std::vector <int> plord; 261 for (int i=0; i<static_cast <int> (nrow); i++) 262 plord.push_back (i + 1); 263 sp_ways.slot ("plotOrder") = plord; 264 plord.clear (); 265 sp_ways.slot ("data") = kv_df; 266 } 267} 268 269 270//' get_osm_relations 271//' 272//' Return a dual Rcpp::List containing all OSM relations, the firmt element of 273//' which holds `multipolygon` relations, while the second holds all others, 274//' which are stored as `multilinestring` objects. 275//' 276//' @param rels Pointer to the vector of Relation objects 277//' @param nodes Pointer to the vector of node objects 278//' @param ways Pointer to the vector of way objects 279//' @param unique_vals Pointer to a UniqueVals object containing std::sets of all 280//' unique IDs and keys for each kind of OSM object (nodes, ways, rels). 281//' 282//' @return A dual Rcpp::List, the first of which contains the multipolygon 283//' relations; the second the multilinestring relations. 284//' 285//' @noRd 286void osm_sp::get_osm_relations (Rcpp::S4 &multilines, Rcpp::S4 &multipolygons, 287 const Relations &rels, const std::map <osmid_t, Node> &nodes, 288 const std::map <osmid_t, OneWay> &ways, const UniqueVals &unique_vals) 289{ 290 /* Trace all multipolygon relations. These are the only OSM types where 291 * sizes are not known before, so lat-lons and node names are stored in 292 * dynamic vectors. These are 3D monsters: #1 for relation, #2 for polygon 293 * in relation, and #3 for data. There are also associated 2D vector<vector> 294 * objects for IDs and multilinestring roles. */ 295 std::set <std::string> keyset; // must be ordered! 296 std::vector <std::string> colnames = {"lat", "lon"}, rownames; 297 Rcpp::List dimnames (0); 298 Rcpp::NumericMatrix nmat (Rcpp::Dimension (0, 0)); 299 300 double_arr2 lat_vec, lon_vec; 301 double_arr3 lat_arr_mp, lon_arr_mp, lon_arr_ls, lat_arr_ls; 302 string_arr2 rowname_vec, id_vec_mp, roles_ls; 303 string_arr3 rowname_arr_mp, rowname_arr_ls; 304 std::vector <osmid_t> ids_ls; 305 std::vector <std::string> ids_mp, rel_id_mp, rel_id_ls; 306 osmt_arr2 id_vec_ls; 307 std::vector <std::string> roles; 308 309 unsigned int nmp = 0, nls = 0; // number of multipolygon and multilinestringrelations 310 for (auto itr = rels.begin (); itr != rels.end (); ++itr) 311 { 312 if (itr->ispoly) 313 nmp++; 314 else 315 { 316 // TODO: Store these as std::vector <std::set <>> to avoid 317 // repetition below 318 std::set <std::string> roles_set; 319 for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw) 320 roles_set.insert (itw->second); 321 nls += roles_set.size (); 322 } 323 } 324 325 size_t ncol = unique_vals.k_rel.size (); 326 rel_id_mp.reserve (nmp); 327 rel_id_ls.reserve (nls); 328 329 Rcpp::CharacterMatrix kv_mat_mp (Rcpp::Dimension (nmp, ncol)), 330 kv_mat_ls (Rcpp::Dimension (nls, ncol)); 331 unsigned int count_mp = 0, count_ls = 0; 332 333 for (auto itr = rels.begin (); itr != rels.end (); ++itr) 334 { 335 Rcpp::checkUserInterrupt (); 336 if (itr->ispoly) // itr->second can only be "outer" or "inner" 337 { 338 trace_multipolygon (itr, ways, nodes, lon_vec, lat_vec, 339 rowname_vec, ids_mp); 340 // Store all ways in that relation and their associated roles 341 rel_id_mp.push_back (std::to_string (itr->id)); 342 lon_arr_mp.push_back (lon_vec); 343 lat_arr_mp.push_back (lat_vec); 344 rowname_arr_mp.push_back (rowname_vec); 345 id_vec_mp.push_back (ids_mp); 346 347 lon_vec.clear (); 348 lon_vec.shrink_to_fit (); 349 lat_vec.clear (); 350 lat_vec.shrink_to_fit (); 351 rowname_vec.clear (); 352 rowname_vec.shrink_to_fit (); 353 ids_mp.clear (); 354 ids_mp.shrink_to_fit (); 355 356 if (nmp > 0) 357 osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_mp, count_mp++); 358 } else // store as multilinestring 359 { 360 // multistrings are grouped here by roles, unlike GDAL which just 361 // dumps all of them. 362 std::set <std::string> roles_set; 363 for (auto itw = itr->ways.begin (); itw != itr->ways.end (); ++itw) 364 roles_set.insert (itw->second); 365 roles.reserve (roles_set.size ()); 366 for (auto it = roles_set.begin (); it != roles_set.end (); ++it) 367 roles.push_back (*it); 368 roles_set.clear (); 369 for (std::string role: roles) 370 { 371 trace_multilinestring (itr, role, ways, nodes, 372 lon_vec, lat_vec, rowname_vec, ids_ls); 373 std::stringstream ss; 374 ss.str (""); 375 if (role == "") 376 ss << std::to_string (itr->id) << "-(no role)"; 377 else 378 ss << std::to_string (itr->id) << "-" << role; 379 rel_id_ls.push_back (ss.str ()); 380 lon_arr_ls.push_back (lon_vec); 381 lat_arr_ls.push_back (lat_vec); 382 rowname_arr_ls.push_back (rowname_vec); 383 id_vec_ls.push_back (ids_ls); 384 385 lon_vec.clear (); 386 lon_vec.shrink_to_fit (); 387 lat_vec.clear (); 388 lat_vec.shrink_to_fit (); 389 rowname_vec.clear (); 390 rowname_vec.shrink_to_fit (); 391 ids_ls.clear (); 392 ids_ls.shrink_to_fit (); 393 394 if (nls > 0) 395 osm_convert::get_value_mat_rel (itr, unique_vals, kv_mat_ls, count_ls++); 396 } 397 roles_ls.push_back (roles); 398 roles.clear (); 399 } 400 } 401 402 osm_convert::convert_multipoly_to_sp (multipolygons, rels, lon_arr_mp, 403 lat_arr_mp, rowname_arr_mp, id_vec_mp, unique_vals); 404 osm_convert::convert_multiline_to_sp (multilines, rels, lon_arr_ls, 405 lat_arr_ls, rowname_arr_ls, id_vec_ls, unique_vals); 406 407 // ****** clean up ***** 408 lon_arr_mp.clear (); 409 lon_arr_mp.shrink_to_fit (); 410 lon_arr_ls.clear (); 411 lon_arr_ls.shrink_to_fit (); 412 lat_arr_mp.clear (); 413 lat_arr_mp.shrink_to_fit (); 414 lat_arr_ls.clear (); 415 lat_arr_ls.shrink_to_fit (); 416 rowname_arr_mp.clear (); 417 rowname_arr_mp.shrink_to_fit (); 418 rowname_arr_ls.clear (); 419 rowname_arr_ls.shrink_to_fit (); 420 421 rel_id_mp.clear (); 422 rel_id_mp.shrink_to_fit (); 423 rel_id_ls.clear (); 424 rel_id_ls.shrink_to_fit (); 425 roles_ls.clear (); 426 roles_ls.shrink_to_fit (); 427} 428 429 430//' rcpp_osmdata_sp 431//' 432//' Extracts all polygons from an overpass API query 433//' 434//' @param st Text contents of an overpass API query 435//' @return A \code{SpatialLinesDataFrame} contains all polygons and associated data 436//' 437//' @noRd 438// [[Rcpp::export]] 439Rcpp::List rcpp_osmdata_sp (const std::string& st) 440{ 441#ifdef DUMP_INPUT 442 { 443 std::ofstream dump ("./osmdata-sp.xml"); 444 if (dump.is_open()) 445 { 446 dump.write (st.c_str(), st.size()); 447 } 448 } 449#endif 450 451 XmlData xml (st); 452 453 const std::map <osmid_t, Node>& nodes = xml.nodes (); 454 const std::map <osmid_t, OneWay>& ways = xml.ways (); 455 const std::vector <Relation>& rels = xml.relations (); 456 const UniqueVals unique_vals = xml.unique_vals (); 457 458 459 /************************************************************************ 460 ************************************************************************ 461 ** ** 462 ** PRE-PROCESSING ** 463 ** ** 464 ************************************************************************ 465 ************************************************************************/ 466 467 // Step#2 468 std::set <osmid_t> poly_ways, non_poly_ways; 469 for (auto itw = ways.begin (); itw != ways.end (); ++itw) 470 { 471 if ((*itw).second.nodes.front () == (*itw).second.nodes.back ()) 472 { 473 if (poly_ways.find ((*itw).first) == poly_ways.end ()) 474 poly_ways.insert ((*itw).first); 475 } else if (non_poly_ways.find ((*itw).first) == non_poly_ways.end ()) 476 non_poly_ways.insert ((*itw).first); 477 } 478 479 /************************************************************************ 480 ************************************************************************ 481 ** ** 482 ** GET OSM DATA ** 483 ** ** 484 ************************************************************************ 485 ************************************************************************/ 486 487 // The actual routines to extract the OSM data and store in sp objects 488 Rcpp::S4 sp_points, sp_lines, sp_polygons, sp_multilines, sp_multipolygons; 489 osm_sp::get_osm_ways (sp_polygons, poly_ways, ways, nodes, unique_vals, "polygon"); 490 osm_sp::get_osm_ways (sp_lines, non_poly_ways, ways, nodes, unique_vals, "line"); 491 osm_sp::get_osm_nodes (sp_points, nodes, unique_vals); 492 osm_sp::get_osm_relations (sp_multilines, sp_multipolygons, 493 rels, nodes, ways, unique_vals); 494 495 // Add bbox and crs to each sp object 496 Rcpp::NumericMatrix bbox = rcpp_get_bbox (xml.x_min (), xml.x_max (), 497 xml.y_min (), xml.y_max ()); 498 sp_points.slot ("bbox") = bbox; 499 sp_lines.slot ("bbox") = bbox; 500 sp_polygons.slot ("bbox") = bbox; 501 sp_multilines.slot ("bbox") = bbox; 502 sp_multipolygons.slot ("bbox") = bbox; 503 504 Rcpp::Language crs_call ("new", "CRS"); 505 Rcpp::S4 crs = crs_call.eval (); 506 crs.slot ("projargs") = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"; 507 sp_points.slot ("proj4string") = crs; 508 sp_lines.slot ("proj4string") = crs; 509 sp_polygons.slot ("proj4string") = crs; 510 sp_multilines.slot ("proj4string") = crs; 511 sp_multipolygons.slot ("proj4string") = crs; 512 513 Rcpp::List ret (6); 514 ret [0] = bbox; 515 ret [1] = sp_points; 516 ret [2] = sp_lines; 517 ret [3] = sp_polygons; 518 ret [4] = sp_multilines; 519 ret [5] = sp_multipolygons; 520 521 std::vector <std::string> retnames {"bbox", "points", "lines", "polygons", 522 "multilines", "multipolygons"}; 523 ret.attr ("names") = retnames; 524 525 return ret; 526}