R package for downloading OpenStreetMap data
1/***************************************************************************
2 * Project: osmdata
3 * File: osmdatap-sc.h
4 * Language: C++
5 *
6 * osmdata is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation, either version 3 of the License, or (at your option)
9 * any later version.
10 *
11 * osmdata is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * osm-router. If not, see <https://www.gnu.org/licenses/>.
18 *
19 * Author: Mark Padgham
20 * E-Mail: mark.padgham@email.com
21 *
22 * Description: Silicate (SC) parsing of OSM XML file
23 *
24 * Limitations:
25 *
26 * Dependencies: none (rapidXML header included in osmdata)
27 *
28 * Compiler Options: -std=c++11
29 ***************************************************************************/
30
31#pragma once
32
33#include <Rcpp.h>
34
35#include "common.h"
36#include "get-bbox.h"
37#include "trace-osm.h"
38#include "convert-osm-rcpp.h"
39
40std::string random_id (size_t len);
41
42/************************************************************************
43 ************************************************************************
44 ** **
45 ** CLASS::XMLDATASC **
46 ** **
47 ************************************************************************
48 ************************************************************************/
49
50
51class XmlDataSC
52{
53 /* Two main options to efficiently store-on-reading are:
54 * 1. Use std::maps for everything, but this would ultimately require
55 * copying all entries over to an appropriate Rcpp::Matrix class; or
56 * 2. Setting up individual vectors for each (id, key, val), and just
57 * Rcpp::wrap-ing them for return.
58 * The second is more efficient, and so is implemented here, via an initial
59 * read to determine the sizes of the vectors (in Counters), then a second
60 * read to store them.
61 */
62
63 public:
64
65 struct Counters {
66 // Initial function getSizes does an initial scan of the XML doc and
67 // establishes the sizes of everything with these counters
68 size_t nnodes, nnode_kv,
69 nways, nway_kv, nedges,
70 nrels, nrel_kv, nrel_memb;
71 std::string id;
72 };
73
74 struct Vectors {
75 // Vectors used to store the data, with sizes allocated according to
76 // the values of Counters
77 //
78 // vectors for key-val pairs in object table:
79 std::vector <std::string>
80 rel_kv_id, rel_key, rel_val,
81 rel_memb_id, rel_memb_type, rel_ref, rel_role,
82 way_id, way_key, way_val,
83 node_id, node_key, node_val;
84
85 // vectors for edge and object_link_edge tables:
86 std::vector <std::string> vx0, vx1, edge, object;
87 // vectors for vertex table
88 std::vector <double> vx, vy;
89 std::vector <std::string> vert_id;
90 };
91
92 struct Maps {
93 std::unordered_map <std::string, std::vector <std::string> >
94 rel_membs, way_membs;
95 };
96
97 private:
98
99 Counters counters;
100 Vectors vectors;
101 Maps maps;
102
103 // Number of nodes in each way, and ways in each rel
104 std::unordered_map <std::string, size_t> waySizes, relSizes;
105
106 public:
107
108 XmlDataSC (const std::string& str)
109 {
110 // APS empty m_nodes/m_ways/m_relations constructed here, no need to explicitly clear
111 XmlDocPtr p = parseXML (str);
112
113 zeroCounters ();
114 getSizes (p->first_node ());
115 vectorsResize ();
116
117 zeroCounters ();
118 traverseWays (p->first_node ());
119 }
120
121 // APS make the dtor virtual since compiler support for "final" is limited
122 virtual ~XmlDataSC ()
123 {
124 }
125
126 const std::vector <std::string>& get_rel_kv_id() const { return vectors.rel_kv_id; }
127 const std::vector <std::string>& get_rel_key() const { return vectors.rel_key; }
128 const std::vector <std::string>& get_rel_val() const { return vectors.rel_val; }
129
130 const std::vector <std::string>& get_rel_memb_id() const { return vectors.rel_memb_id; }
131 const std::vector <std::string>& get_rel_memb_type() const { return vectors.rel_memb_type; }
132 const std::vector <std::string>& get_rel_ref() const { return vectors.rel_ref; }
133 const std::vector <std::string>& get_rel_role() const { return vectors.rel_role; }
134
135 const std::vector <std::string>& get_way_id() const { return vectors.way_id; }
136 const std::vector <std::string>& get_way_key() const { return vectors.way_key; }
137 const std::vector <std::string>& get_way_val() const { return vectors.way_val; }
138
139 const std::vector <std::string>& get_node_id() const { return vectors.node_id; }
140 const std::vector <std::string>& get_node_key() const { return vectors.node_key; }
141 const std::vector <std::string>& get_node_val() const { return vectors.node_val; }
142
143 // vectors for edge and object_link_edge tables:
144 const std::vector <std::string>& get_vx0 () const { return vectors.vx0; }
145 const std::vector <std::string>& get_vx1 () const { return vectors.vx1; }
146 const std::vector <std::string>& get_edge () const { return vectors.edge; }
147 const std::vector <std::string>& get_object () const { return vectors.object; }
148
149 // vectors for vertex table
150 const std::vector <std::string>& get_vert_id () const { return vectors.vert_id; }
151 const std::vector <double>& get_vx () const { return vectors.vx; }
152 const std::vector <double>& get_vy () const { return vectors.vy; }
153
154 const std::unordered_map <std::string, std::vector <std::string> >&
155 get_rel_membs () const { return maps.rel_membs; }
156 const std::unordered_map <std::string, std::vector <std::string> >&
157 get_way_membs () const { return maps.way_membs; }
158
159 private:
160
161 void zeroCounters ();
162 void getSizes (XmlNodePtr pt);
163 void vectorsResize ();
164
165 void countRelation (XmlNodePtr pt);
166 void countWay (XmlNodePtr pt);
167 void countNode (XmlNodePtr pt);
168
169 void traverseWays (XmlNodePtr pt); // The primary function
170
171 void traverseRelation (XmlNodePtr pt, size_t &memb_num);
172 void traverseWay (XmlNodePtr pt, size_t& node_num);
173 void traverseNode (XmlNodePtr pt);
174
175}; // end Class::XmlDataSC
176
177inline void XmlDataSC::zeroCounters ()
178{
179 counters.nnodes = 0;
180 counters.nnode_kv = 0;
181
182 counters.nways = 0;
183 counters.nway_kv = 0;
184 counters.nedges = 0;
185
186 counters.nrels = 0;
187 counters.nrel_kv = 0;
188 counters.nrel_memb = 0;
189}
190
191inline void XmlDataSC::vectorsResize ()
192{
193 vectors.rel_kv_id.resize (counters.nrel_kv);
194 vectors.rel_key.resize (counters.nrel_kv);
195 vectors.rel_val.resize (counters.nrel_kv);
196
197 vectors.rel_memb_id.resize (counters.nrel_memb);
198 vectors.rel_memb_type.resize (counters.nrel_memb);
199 vectors.rel_ref.resize (counters.nrel_memb);
200 vectors.rel_role.resize (counters.nrel_memb);
201
202 vectors.way_id.resize (counters.nway_kv);
203 vectors.way_key.resize (counters.nway_kv);
204 vectors.way_val.resize (counters.nway_kv);
205
206 vectors.node_id.resize (counters.nnode_kv);
207 vectors.node_key.resize (counters.nnode_kv);
208 vectors.node_val.resize (counters.nnode_kv);
209
210 vectors.vx0.resize (counters.nedges);
211 vectors.vx1.resize (counters.nedges);
212 vectors.edge.resize (counters.nedges);
213 vectors.object.resize (counters.nedges);
214
215 vectors.vx.resize (counters.nnodes);
216 vectors.vy.resize (counters.nnodes);
217 vectors.vert_id.resize (counters.nnodes);
218
219 for (auto m: relSizes)
220 {
221 maps.rel_membs.emplace (m.first, std::vector <std::string> (m.second));
222 }
223 for (auto m: waySizes)
224 {
225 maps.way_membs.emplace (m.first, std::vector <std::string> (m.second));
226 }
227}
228
229/************************************************************************
230 ************************************************************************
231 ** **
232 ** FUNCTION::GETSIZES **
233 ** **
234 ************************************************************************
235 ************************************************************************/
236
237inline void XmlDataSC::getSizes (XmlNodePtr pt)
238{
239 for (XmlNodePtr it = pt->first_node (); it != nullptr;
240 it = it->next_sibling())
241 {
242 if (!strcmp (it->name(), "node"))
243 {
244 countNode (it); // increments nnode_kv
245 counters.nnodes++;
246 }
247 else if (!strcmp (it->name(), "way"))
248 {
249 size_t wayLength = counters.nedges;
250 countWay (it); // increments nway_kv, nedges
251 wayLength = counters.nedges - wayLength;
252 counters.nedges--; // counts nodes, so each way has nedges = 1 - nnodes
253 waySizes.emplace (counters.id, wayLength);
254 counters.nways++;
255 }
256 else if (!strcmp (it->name(), "relation"))
257 {
258 size_t relLength = counters.nrel_memb;
259 countRelation (it); // increments nrel_kv, nrel_memb
260 relLength = counters.nrel_memb - relLength;
261 relSizes.emplace (counters.id, relLength);
262 counters.nrels++;
263 }
264 else
265 {
266 getSizes (it);
267 }
268 }
269} // end function XmlDataSC::getSizes
270
271
272/************************************************************************
273 ************************************************************************
274 ** **
275 ** FUNCTION::COUNTRELATION **
276 ** **
277 ************************************************************************
278 ************************************************************************/
279
280inline void XmlDataSC::countRelation (XmlNodePtr pt)
281{
282 // Relations can have either members or key-val pairs, counted here with
283 // separate counters
284 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
285 it = it->next_attribute())
286 {
287 if (!strcmp (it->name(), "id"))
288 counters.id = it->value();
289 else if (!strcmp (it->name(), "type"))
290 counters.nrel_memb++;
291 else if (!strcmp (it->name(), "k"))
292 counters.nrel_kv++;
293 }
294 // allows for >1 child nodes
295 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
296 {
297 countRelation (it);
298 }
299} // end function XmlDataSC::countRelation
300
301
302/************************************************************************
303 ************************************************************************
304 ** **
305 ** FUNCTION::COUNTWAY **
306 ** **
307 ************************************************************************
308 ************************************************************************/
309
310inline void XmlDataSC::countWay (XmlNodePtr pt)
311{
312 // Ways can have either member nodes, called "ref", or key-val pairs
313 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
314 it = it->next_attribute())
315 {
316 if (!strcmp (it->name(), "id"))
317 counters.id = it->value();
318 else if (!strcmp (it->name(), "k"))
319 counters.nway_kv++;
320 else if (!strcmp (it->name(), "ref"))
321 counters.nedges++;
322 }
323 // allows for >1 child nodes
324 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
325 {
326 countWay (it);
327 }
328} // end function XmlDataSC::countWay
329
330
331/************************************************************************
332 ************************************************************************
333 ** **
334 ** FUNCTION::COUNTNODE **
335 ** **
336 ************************************************************************
337 ************************************************************************/
338
339inline void XmlDataSC::countNode (XmlNodePtr pt)
340{
341 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
342 it = it->next_attribute())
343 {
344 if (!strcmp (it->name(), "k"))
345 counters.nnode_kv++;
346 }
347 // allows for >1 child nodes
348 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
349 {
350 countNode (it);
351 }
352} // end function XmlDataSC::countNode
353
354
355/************************************************************************
356 ************************************************************************
357 ** **
358 ** FUNCTION::TRAVERSEWAYS **
359 ** **
360 ************************************************************************
361 ************************************************************************/
362
363inline void XmlDataSC::traverseWays (XmlNodePtr pt)
364{
365 for (XmlNodePtr it = pt->first_node (); it != nullptr;
366 it = it->next_sibling())
367 {
368 if (!strcmp (it->name(), "node"))
369 {
370 traverseNode (it);
371 counters.nnodes++;
372 } else if (!strcmp (it->name(), "way"))
373 {
374 size_t node_num = 0;
375 traverseWay (it, node_num);
376 counters.nways++;
377 }
378 else if (!strcmp (it->name(), "relation"))
379 {
380 size_t memb_num = 0;
381 traverseRelation (it, memb_num);
382 counters.nrels++;
383 }
384 else
385 {
386 traverseWays (it);
387 }
388 }
389
390} // end function XmlDataSC::traverseWays
391
392
393/************************************************************************
394 ************************************************************************
395 ** **
396 ** FUNCTION::TRAVERSERELATION **
397 ** **
398 ************************************************************************
399 ************************************************************************/
400
401inline void XmlDataSC::traverseRelation (XmlNodePtr pt, size_t &memb_num)
402{
403 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
404 it = it->next_attribute())
405 {
406 if (!strcmp (it->name(), "id"))
407 {
408 // These values are always first, so all other clauses are executed
409 // after this one
410 counters.id = it->value();
411 } else if (!strcmp (it->name(), "k"))
412 {
413 vectors.rel_kv_id [counters.nrel_kv] = counters.id;
414 vectors.rel_key [counters.nrel_kv] = it->value();
415 } else if (!strcmp (it->name(), "v"))
416 vectors.rel_val [counters.nrel_kv++] = it->value();
417 else if (!strcmp (it->name(), "type"))
418 {
419 vectors.rel_memb_type [counters.nrel_memb] = it->value();
420 vectors.rel_memb_id [counters.nrel_memb] = counters.id;
421 } else if (!strcmp (it->name(), "ref"))
422 {
423 vectors.rel_ref [counters.nrel_memb] = it->value();
424 // TODO: Is there a safer alternative to next line?
425 maps.rel_membs.at (counters.id) [memb_num++] = it->value();
426 } else if (!strcmp (it->name(), "role"))
427 vectors.rel_role [counters.nrel_memb++] = it->value();
428 }
429 // allows for >1 child nodes
430 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
431 {
432 traverseRelation (it, memb_num);
433 }
434} // end function XmlDataSC::traverseRelation
435
436
437/************************************************************************
438 ************************************************************************
439 ** **
440 ** FUNCTION::TRAVERSEWAY **
441 ** **
442 ************************************************************************
443 ************************************************************************/
444
445inline void XmlDataSC::traverseWay (XmlNodePtr pt, size_t& node_num)
446{
447 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
448 it = it->next_attribute())
449 {
450 if (!strcmp (it->name(), "id"))
451 {
452 // These values are always first, so all other clauses are executed
453 // after this one
454 counters.id = it->value();
455 } else if (!strcmp (it->name(), "k"))
456 {
457 vectors.way_id [counters.nway_kv] = counters.id;
458 vectors.way_key [counters.nway_kv] = it->value();
459 } else if (!strcmp (it->name(), "v"))
460 vectors.way_val [counters.nway_kv++] = it->value();
461 else if (!strcmp (it->name(), "ref"))
462 {
463 maps.way_membs.at (counters.id) [node_num] = it->value();
464 if (node_num == 0)
465 vectors.vx0 [counters.nedges] = it->value();
466 else
467 {
468 vectors.vx1 [counters.nedges] = it->value();
469 vectors.object [counters.nedges] = counters.id;
470 vectors.edge [counters.nedges] = random_id (10);
471 counters.nedges++;
472 if (counters.nedges < vectors.vx0.size ())
473 {
474 vectors.vx0 [counters.nedges] = it->value();
475 }
476 }
477 node_num++;
478 }
479 }
480
481 // allows for >1 child nodes
482 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
483 {
484 traverseWay (it, node_num);
485 }
486} // end function XmlDataSC::traverseWay
487
488
489/************************************************************************
490 ************************************************************************
491 ** **
492 ** FUNCTION::TRAVERSENODE **
493 ** **
494 ************************************************************************
495 ************************************************************************/
496
497inline void XmlDataSC::traverseNode (XmlNodePtr pt)
498{
499 for (XmlAttrPtr it = pt->first_attribute (); it != nullptr;
500 it = it->next_attribute())
501 {
502 if (!strcmp (it->name(), "id"))
503 vectors.vert_id [counters.nnodes] = it->value();
504 else if (!strcmp (it->name(), "lat"))
505 vectors.vy [counters.nnodes] = std::stod(it->value());
506 else if (!strcmp (it->name(), "lon"))
507 vectors.vx [counters.nnodes] = std::stod(it->value());
508 else if (!strcmp (it->name(), "k"))
509 vectors.node_key [counters.nnode_kv] = it->value();
510 else if (!strcmp (it->name(), "v"))
511 {
512 vectors.node_val [counters.nnode_kv] = it->value();
513 vectors.node_id [counters.nnode_kv] =
514 vectors.vert_id [counters.nnodes]; // will always be pre-set
515 counters.nnode_kv++;
516 }
517 }
518 // allows for >1 child nodes
519 for (XmlNodePtr it = pt->first_node(); it != nullptr; it = it->next_sibling())
520 {
521 traverseNode (it);
522 }
523} // end function XmlDataSC::traverseNode
524
525
526/************************************************************************
527 ************************************************************************
528 ** **
529 ** ADDITIONAL FUNCTIONS **
530 ** **
531 ************************************************************************
532 ************************************************************************/
533
534Rcpp::List rel_membs_as_list (XmlDataSC &xml);
535Rcpp::List way_membs_as_list (XmlDataSC &xml);