Distances on Directed Graphs in R
1#' Identify the essential columns of the graph table (data.frame, tibble,
2#' whatever) to be analysed in the C++ routines.
3#'
4#' @param graph A `data.frame` containing the edges of the graph
5#' @return A list of column numbers of `edge_id`, `from`,
6#' `to`, `d`, `w`, `time`, `xfr`, `yfr`, `xto`, `yto`, and `component`, some of
7#' which may be NA.
8#'
9#' @noRd
10dodgr_graph_cols <- function (graph) {
11
12 nms <- names (graph)
13 component <- find_component_col (graph)
14 if (methods::is (graph, "dodgr_streetnet") &&
15 !methods::is (graph, "dodgr_streetnet_sc") &&
16 ncol (graph) >= 11) {
17 # columns are always identically structured
18 edge_id <- which (nms == "edge_id") %>% null_to_na ()
19 fr_col <- which (nms %in% c ("from", "from_id")) %>% null_to_na ()
20 to_col <- which (nms %in% c ("to", "to_id")) %>% null_to_na ()
21 d_col <- which (nms == "d")
22 w_col <- which (nms == "d_weighted")
23
24 xfr <- which (nms %in% c ("xfr", "from_lon"))
25 if (length (xfr) == 0) xfr <- NA
26 yfr <- which (nms %in% c ("yfr", "from_lat"))
27 if (length (yfr) == 0) yfr <- NA
28 xto <- which (nms %in% c ("xto", "to_lon"))
29 if (length (xto) == 0) xto <- NA
30 yto <- which (nms %in% c ("yto", "to_lat"))
31 if (length (yto) == 0) yto <- NA
32 } else {
33 edge_id <- find_edge_id_col (graph)
34
35 d_col <- find_d_col (graph)
36 w_col <- find_w_col (graph)
37 # sc ensures this never happens, so not covered
38 if (length (w_col) == 0) {
39 w_col <- d_col # nocov
40 }
41
42 fr_col <- find_fr_id_col (graph)
43 to_col <- find_to_id_col (graph)
44
45 xfr <- yfr <- xto <- yto <- NA
46 # TODO: Modify for other complex but non-spatial types of graph
47 if (is_graph_spatial (graph)) {
48 spcols <- find_spatial_cols (graph)
49 graph <- tbl_to_df (graph)
50
51 fr_is_num <- vapply (spcols$fr_col, function (i) {
52 is.numeric (graph [[i]])
53 }, logical (1))
54 to_is_num <- vapply (spcols$to_col, function (i) {
55 is.numeric (graph [[i]])
56 }, logical (1))
57 if (!(all (fr_is_num) && all (to_is_num))) {
58 stop (paste0 (
59 "graph appears to have non-numeric ",
60 "longitudes and latitudes"
61 ))
62 }
63
64 xfr <- spcols$fr_col [1]
65 yfr <- spcols$fr_col [2]
66 xto <- spcols$to_col [1]
67 yto <- spcols$to_col [2]
68 } else {
69 if (length (fr_col) != 1 && length (to_col) != 1) {
70 stop ("Unable to determine from and to columns in graph")
71 } # nolint # nocov
72 }
73 }
74
75 time_col <- grep ("time", nms)
76 if (length (time_col) != 1) {
77 time_col <- grep ("time$", nms)
78 if (length (time_col) != 1) {
79 time_col <- NA
80 }
81 }
82 timew_col <- grep ("time_w|timew|tw", nms)
83 if (length (timew_col) != 1) {
84 timew_col <- grep ("time_w|timew|^tw", nms)
85 if (length (timew_col) != 1) {
86 timew_col <- NA
87 }
88 }
89
90 ret <- c (
91 edge_id,
92 fr_col,
93 to_col,
94 d_col,
95 w_col,
96 time_col,
97 timew_col,
98 xfr,
99 yfr,
100 xto,
101 yto,
102 component
103 )
104 names (ret) <- c (
105 "edge_id",
106 "from",
107 "to",
108 "d",
109 "d_weighted",
110 "time",
111 "time_weighted",
112 "xfr",
113 "yfr",
114 "xto",
115 "yto",
116 "component"
117 )
118 class (ret) <- c (class (ret), "graph_columns")
119
120 # This is passed to many C++ routines, in which case it needs to be
121 # converted to a vector (`do.call (c, gr_cols)`), and the R-style 1-indexeso
122 # need to be converted to equivalent 0-indexed forms
123 return (as.list (ret))
124}
125
126#' Convert graph to a standard form suitable for submission to C++ routines
127#'
128#' @noRd
129convert_graph <- function (graph, gr_cols) {
130
131 tp <- NULL
132 if ("turn_penalty" %in% names (attributes (graph))) {
133 tp <- attr (graph, "turn_penalty")
134 }
135
136 keep_cols <- c (
137 "edge_id", "from", "to", "d", "d_weighted",
138 "time", "time_weighted"
139 )
140 index <- do.call (c, gr_cols [keep_cols])
141 index <- index [!is.na (index)]
142 graph <- graph [, index]
143 names (graph) <- names (index)
144
145 if ("edge_id" %in% names (graph)) {
146 graph$edge_id <- convert_to_char (graph$edge_id)
147 }
148 graph$from <- convert_to_char (graph$from)
149 graph$to <- convert_to_char (graph$to)
150
151 if (!"time_weighted" %in% names (graph)) {
152 graph$time_weighted <- graph$time
153 }
154
155 if (!is.null (tp)) {
156 attr (graph, "turn_penalty") <- tp
157 }
158
159 return (graph)
160}
161
162convert_to_char <- function (x) {
163
164 if (!is.character (x)) x <- paste0 (x)
165 return (x)
166}
167
168tbl_to_df <- function (graph) {
169
170 if (methods::is (graph, "tbl")) {
171 classes <- class (graph) [!grepl ("tbl", class (graph))]
172 graph <- as.data.frame (graph)
173 class (graph) <- classes
174 }
175 return (graph)
176}
177
178
179
180#' Extract vertices of graph, including spatial coordinates if included.
181#'
182#' @param graph A flat table of graph edges. Must contain columns labelled
183#' `from` and `to`, or `start` and `stop`. May also contain
184#' similarly labelled columns of spatial coordinates (for example
185#' `from_x`) or `stop_lon`).
186#' @return A `data.frame` of vertices with unique numbers (`n`).
187#'
188#' @note Values of `n` are 0-indexed
189#'
190#' @family misc
191#' @export
192#' @examples
193#' graph <- weight_streetnet (hampi)
194#' v <- dodgr_vertices (graph)
195dodgr_vertices <- function (graph) {
196
197 # vertices are calculated as background process, so wait if that's not
198 # finished.
199 if ("px" %in% names (attributes (graph))) {
200 px <- attr (graph, "px")
201 while (px$is_alive ()) {
202 px$wait ()
203 }
204 }
205
206 hash <- ifelse (methods::is (graph, "dodgr_contracted"),
207 "hashc", "hash"
208 )
209 hash <- attr (graph, hash)
210 # make sure rows of graph have not been changed
211 gr_cols <- dodgr_graph_cols (graph)
212 hashe <- digest::digest (graph [[gr_cols$edge_id]])
213 if (!identical (hashe, hash)) {
214 hash <- NULL
215 }
216 if (!is.null (hash)) {
217 hashe_ref <- attr (graph, "hashe")
218 hashe_ref <- ifelse (is.null (hashe_ref), "", hashe_ref)
219 hashe <- digest::digest (graph [[gr_cols$edge_id]])
220 if (hashe != hashe_ref) {
221 hash <- hashe
222 }
223 } else {
224 if (is.na (gr_cols$edge_id)) {
225 hash <- "" # nocov
226 } else {
227 hash <- get_hash (graph, contracted = FALSE, force = TRUE)
228 }
229 }
230
231 verts_up_to_date <- FALSE
232
233 fname <- fs::path (fs::path_temp (), paste0 ("dodgr_verts_", hash, ".Rds"))
234 if (hash != "" && fs::file_exists (fname)) {
235 verts <- readRDS (fname)
236 # Then double-check to ensure those vertices match current graph:
237 gr_cols <- dodgr_graph_cols (graph)
238 graph_verts <- unique (c (graph [[gr_cols$from]], graph [[gr_cols$to]]))
239 verts_up_to_date <-
240 (all (graph_verts %in% verts$id) && all (verts$id %in% graph_verts))
241 }
242
243 if (!verts_up_to_date) {
244 verts <- dodgr_vertices_internal (graph)
245 saveRDS (verts, fname)
246 }
247
248 return (verts)
249}
250
251dodgr_vertices_internal <- function (graph) {
252
253 graph <- tbl_to_df (graph)
254
255 gr_cols <- dodgr_graph_cols (graph)
256 # cols are (edge_id, from, to, d, w, component, xfr, yfr, xto, yto)
257 # NOTE: c (x, y), where x and y are both factors gives junk, so explicit
258 # conversion required here: TODO: Find a better way?
259 if (is.factor (graph [[gr_cols$from]])) {
260 graph [[gr_cols$from]] <- paste0 (graph [[gr_cols$from]])
261 }
262 if (is.factor (graph [[gr_cols$to]])) {
263 graph [[gr_cols$to]] <- paste0 (graph [[gr_cols$to]])
264 }
265
266 if (is_graph_spatial (graph)) {
267 verts <- data.frame (
268 id = c (
269 graph [[gr_cols$from]],
270 graph [[gr_cols$to]]
271 ),
272 x = c (
273 graph [[gr_cols$xfr]],
274 graph [[gr_cols$xto]]
275 ),
276 y = c (
277 graph [[gr_cols$yfr]],
278 graph [[gr_cols$yto]]
279 ),
280 stringsAsFactors = FALSE
281 )
282 if (!is.na (gr_cols$component)) {
283 verts$component <- graph [[gr_cols$component]]
284 }
285 } else {
286 verts <- data.frame (
287 id = c (
288 graph [[gr_cols$from]],
289 graph [[gr_cols$to]]
290 ),
291 stringsAsFactors = FALSE
292 )
293 if (!is.na (gr_cols$component)) {
294 verts$component <- graph [[gr_cols$component]]
295 }
296 }
297
298 # The next line is the time-killer here, which is why this is cached
299 indx <- which (!duplicated (verts$id))
300 verts <- verts [indx, , drop = FALSE] # nolint
301 verts$n <- seq_len (nrow (verts)) - 1
302
303 return (verts)
304}
305
306
307#' Identify connected components of graph.
308#'
309#' Identify connected components of graph and add corresponding `component`
310#' column to `data.frame`.
311#'
312#' @param graph A `data.frame` of edges
313#' @return Equivalent graph with additional `component` column,
314#' sequentially numbered from 1 = largest component.
315#' @family modification
316#' @export
317#' @examples
318#' graph <- weight_streetnet (hampi)
319#' graph <- dodgr_components (graph)
320dodgr_components <- function (graph) {
321
322 graph <- tbl_to_df (graph)
323
324 if ("component" %in% names (graph)) {
325 message ("graph already has a component column")
326 } else {
327 gr_cols <- dodgr_graph_cols (graph)
328 graph2 <- convert_graph (graph, gr_cols)
329 if (is.na (gr_cols$edge_id)) {
330 graph2$edge_id <- seq_len (nrow (graph2))
331 }
332 cns <- rcpp_get_component_vector (graph2)
333
334 indx <- match (graph2$edge_id, cns$edge_id)
335 component <- cns$edge_component [indx]
336 # Then re-number in order to decreasing component size:
337 graph$component <- match (
338 component,
339 order (table (component), decreasing = TRUE)
340 )
341 }
342
343 return (graph)
344}
345
346#' Sample a random but connected sub-component of a graph
347#'
348#' @param graph A flat table of graph edges. Must contain columns labelled
349#' `from` and `to`, or `start` and `stop`. May also contain
350#' similarly labelled columns of spatial coordinates (for example
351#' `from_x`) or `stop_lon`).
352#' @param nverts Number of vertices to sample
353#'
354#' @return A connected sub-component of `graph`
355#'
356#' @note Graphs may occasionally have `nverts + 1` vertices, rather than
357#' the requested `nverts`.
358#'
359#' @family misc
360#' @export
361#' @examples
362#' graph <- weight_streetnet (hampi)
363#' nrow (graph) # 5,742
364#' graph <- dodgr_sample (graph, nverts = 200)
365#' nrow (graph) # generally around 400 edges
366#' nrow (dodgr_vertices (graph)) # 200
367dodgr_sample <- function (graph, nverts = 1000) {
368
369 graph <- tbl_to_df (graph)
370
371 fr <- find_fr_id_col (graph)
372 to <- find_to_id_col (graph)
373 verts <- unique (c (graph [, fr], graph [, to]))
374 gr_cols <- dodgr_graph_cols (graph)
375 if (is.na (gr_cols$edge_id)) {
376 graph$edge_id <- seq_len (nrow (graph))
377 gr_cols <- dodgr_graph_cols (graph)
378 }
379
380 if (length (verts) > nverts) {
381 graph2 <- convert_graph (graph, gr_cols)
382 if ("component" %in% names (graph)) {
383 graph2$component <- graph$component
384 }
385 indx <- match (rcpp_sample_graph (graph2, nverts), graph2$edge_id)
386 graph <- graph [sort (indx), ]
387 }
388
389 attr (graph, "hash") <- get_hash (graph, contracted = FALSE, force = TRUE)
390
391 return (graph)
392}
393
394#' Insert a new node or vertex into a network
395#'
396#' @param v1 Vertex defining start of graph edge along which new vertex is to be
397#' inserted
398#' @param v2 Vertex defining end of graph edge along which new vertex is to be
399#' inserted (order of `v1` and `v2` is not important).
400#' @param x The `x`-coordinate of new vertex. If not specified, vertex is
401#' created half-way between `v1` and `v2`.
402#' @param y The `y`-coordinate of new vertex. If not specified, vertex is
403#' created half-way between `v1` and `v2`.
404#' @return A modified graph with specified edge between defined start and end
405#' vertices split into two edges either side of new vertex.
406#' @inheritParams dodgr_vertices
407#'
408#' @family misc
409#' @export
410#' @examples
411#' graph <- weight_streetnet (hampi)
412#' e1 <- sample (nrow (graph), 1)
413#' v1 <- graph$from_id [e1]
414#' v2 <- graph$to_id [e1]
415#' # insert new vertex in the middle of that randomly-selected edge:
416#' graph2 <- dodgr_insert_vertex (graph, v1, v2)
417#' nrow (graph)
418#' nrow (graph2) # new edges added to graph2
419dodgr_insert_vertex <- function (graph, v1, v2, x = NULL, y = NULL) {
420
421 graph_t <- tbl_to_df (graph)
422 gr_cols <- dodgr_graph_cols (graph_t)
423 index12 <- which (graph [[gr_cols$from]] == v1 & graph [[gr_cols$to]] == v2)
424 index21 <- which (graph [[gr_cols$from]] == v2 & graph [[gr_cols$to]] == v1)
425 if (length (index12) == 0 && length (index21) == 0) {
426 stop ("Nominated vertices do not define any edges in graph")
427 }
428 if ((!is.null (x) && is.null (y)) || (is.null (x) && !is.null (y))) {
429 stop ("Either both x and y must be NULL, or both must be specified")
430 }
431
432
433 charvec <- c (letters, LETTERS, 0:9)
434 randid <- function (charvec, len = 10) {
435 paste0 (sample (charvec, len, replace = TRUE), collapse = "")
436 }
437
438 if (length (index12) == 1) {
439 graph <- insert_one_edge (graph, index12, x, y, gr_cols)
440 graph [index12, gr_cols$to] <-
441 graph [index12 + 1, gr_cols$from] <- randid (charvec, 10)
442 index21 <- which (graph [[gr_cols$from]] == v2 &
443 graph [[gr_cols$to]] == v1)
444 }
445 if (length (index21) == 1) {
446 graph <- insert_one_edge (graph, index21, x, y, gr_cols)
447 graph [index21, gr_cols$to] <-
448 graph [index21 + 1, gr_cols$from] <- randid (charvec, 10)
449 }
450
451 attr (graph, "hash") <- get_hash (graph, contracted = FALSE, force = TRUE)
452
453 return (graph)
454}
455
456insert_one_edge <- function (graph, index, x, y, gr_cols) {
457
458 if (is.null (x) && is.null (y)) {
459 x <- (graph [[gr_cols$xfr]] [index] +
460 graph [[gr_cols$xto]] [index]) / 2
461 y <- (graph [[gr_cols$yfr]] [index] +
462 graph [[gr_cols$yto]] [index]) / 2
463 }
464 expand_index <- c (
465 seq_len (index),
466 index,
467 seq_len (nrow (graph)) [-seq_len (index)]
468 )
469
470 graph <- graph [expand_index, ]
471 graph [index, gr_cols$xto] <- x
472 graph [index, gr_cols$yto] <- y
473 graph [index + 1, gr_cols$xfr] <- x
474 graph [index + 1, gr_cols$yfr] <- y
475
476 xy1 <- c (
477 x = graph [[gr_cols$xfr]] [index],
478 y = graph [[gr_cols$yfr]] [index]
479 )
480 xy2 <- c (
481 x = graph [[gr_cols$xto]] [index + 1],
482 y = graph [[gr_cols$yto]] [index + 1]
483 )
484 if (is_graph_spatial (graph)) {
485 # Get geodist measure, noting that graph has no hash at this stage, so
486 # full calculation will be executed.
487 measure <- get_geodist_measure (graph)
488
489 d1 <- geodist::geodist (xy1, c (x = x, y = y), measure = measure)
490 d2 <- geodist::geodist (xy2, c (x = x, y = y), measure = measure)
491 } else {
492 d1 <- sqrt ((xy1 [1] - x)^2 + (xy1 [2] - y)^2)
493 d2 <- sqrt ((x - xy2 [1])^2 + (y - xy2 [2])^2)
494 }
495 wt <- graph [index, gr_cols$d_weighted] /
496 graph [index, gr_cols$d]
497 if (!is.na (gr_cols$time)) {
498 time_scale <- graph [index, gr_cols$time] /
499 graph [index, gr_cols$d]
500 time_wt <- graph [index, gr_cols$time_weighted] /
501 graph [index, gr_cols$time]
502 }
503 graph [index, gr_cols$d] <- d1
504 graph [index, gr_cols$d_weighted] <- d1 * wt
505 graph [index + 1, gr_cols$d] <- d2
506 graph [index + 1, gr_cols$d_weighted] <- d2 * wt
507
508 if (!is.na (gr_cols$time)) {
509 graph [index, gr_cols$time] <- d1 * time_scale
510 graph [index + 1, gr_cols$time] <- d2 * time_scale
511 graph [index, gr_cols$time_weighted] <- d1 * time_scale * time_wt
512 graph [index + 1, gr_cols$time_weighted] <- d2 * time_scale * time_wt
513 }
514
515 graph$edge_id [index] <- paste0 (graph$edge_id [index], "_a")
516 graph$edge_id [index + 1] <- paste0 (graph$edge_id [index + 1], "_b")
517
518 return (graph)
519}