Distances on Directed Graphs in R
at main 519 lines 16 kB view raw
1#' Identify the essential columns of the graph table (data.frame, tibble, 2#' whatever) to be analysed in the C++ routines. 3#' 4#' @param graph A `data.frame` containing the edges of the graph 5#' @return A list of column numbers of `edge_id`, `from`, 6#' `to`, `d`, `w`, `time`, `xfr`, `yfr`, `xto`, `yto`, and `component`, some of 7#' which may be NA. 8#' 9#' @noRd 10dodgr_graph_cols <- function (graph) { 11 12 nms <- names (graph) 13 component <- find_component_col (graph) 14 if (methods::is (graph, "dodgr_streetnet") && 15 !methods::is (graph, "dodgr_streetnet_sc") && 16 ncol (graph) >= 11) { 17 # columns are always identically structured 18 edge_id <- which (nms == "edge_id") %>% null_to_na () 19 fr_col <- which (nms %in% c ("from", "from_id")) %>% null_to_na () 20 to_col <- which (nms %in% c ("to", "to_id")) %>% null_to_na () 21 d_col <- which (nms == "d") 22 w_col <- which (nms == "d_weighted") 23 24 xfr <- which (nms %in% c ("xfr", "from_lon")) 25 if (length (xfr) == 0) xfr <- NA 26 yfr <- which (nms %in% c ("yfr", "from_lat")) 27 if (length (yfr) == 0) yfr <- NA 28 xto <- which (nms %in% c ("xto", "to_lon")) 29 if (length (xto) == 0) xto <- NA 30 yto <- which (nms %in% c ("yto", "to_lat")) 31 if (length (yto) == 0) yto <- NA 32 } else { 33 edge_id <- find_edge_id_col (graph) 34 35 d_col <- find_d_col (graph) 36 w_col <- find_w_col (graph) 37 # sc ensures this never happens, so not covered 38 if (length (w_col) == 0) { 39 w_col <- d_col # nocov 40 } 41 42 fr_col <- find_fr_id_col (graph) 43 to_col <- find_to_id_col (graph) 44 45 xfr <- yfr <- xto <- yto <- NA 46 # TODO: Modify for other complex but non-spatial types of graph 47 if (is_graph_spatial (graph)) { 48 spcols <- find_spatial_cols (graph) 49 graph <- tbl_to_df (graph) 50 51 fr_is_num <- vapply (spcols$fr_col, function (i) { 52 is.numeric (graph [[i]]) 53 }, logical (1)) 54 to_is_num <- vapply (spcols$to_col, function (i) { 55 is.numeric (graph [[i]]) 56 }, logical (1)) 57 if (!(all (fr_is_num) && all (to_is_num))) { 58 stop (paste0 ( 59 "graph appears to have non-numeric ", 60 "longitudes and latitudes" 61 )) 62 } 63 64 xfr <- spcols$fr_col [1] 65 yfr <- spcols$fr_col [2] 66 xto <- spcols$to_col [1] 67 yto <- spcols$to_col [2] 68 } else { 69 if (length (fr_col) != 1 && length (to_col) != 1) { 70 stop ("Unable to determine from and to columns in graph") 71 } # nolint # nocov 72 } 73 } 74 75 time_col <- grep ("time", nms) 76 if (length (time_col) != 1) { 77 time_col <- grep ("time$", nms) 78 if (length (time_col) != 1) { 79 time_col <- NA 80 } 81 } 82 timew_col <- grep ("time_w|timew|tw", nms) 83 if (length (timew_col) != 1) { 84 timew_col <- grep ("time_w|timew|^tw", nms) 85 if (length (timew_col) != 1) { 86 timew_col <- NA 87 } 88 } 89 90 ret <- c ( 91 edge_id, 92 fr_col, 93 to_col, 94 d_col, 95 w_col, 96 time_col, 97 timew_col, 98 xfr, 99 yfr, 100 xto, 101 yto, 102 component 103 ) 104 names (ret) <- c ( 105 "edge_id", 106 "from", 107 "to", 108 "d", 109 "d_weighted", 110 "time", 111 "time_weighted", 112 "xfr", 113 "yfr", 114 "xto", 115 "yto", 116 "component" 117 ) 118 class (ret) <- c (class (ret), "graph_columns") 119 120 # This is passed to many C++ routines, in which case it needs to be 121 # converted to a vector (`do.call (c, gr_cols)`), and the R-style 1-indexeso 122 # need to be converted to equivalent 0-indexed forms 123 return (as.list (ret)) 124} 125 126#' Convert graph to a standard form suitable for submission to C++ routines 127#' 128#' @noRd 129convert_graph <- function (graph, gr_cols) { 130 131 tp <- NULL 132 if ("turn_penalty" %in% names (attributes (graph))) { 133 tp <- attr (graph, "turn_penalty") 134 } 135 136 keep_cols <- c ( 137 "edge_id", "from", "to", "d", "d_weighted", 138 "time", "time_weighted" 139 ) 140 index <- do.call (c, gr_cols [keep_cols]) 141 index <- index [!is.na (index)] 142 graph <- graph [, index] 143 names (graph) <- names (index) 144 145 if ("edge_id" %in% names (graph)) { 146 graph$edge_id <- convert_to_char (graph$edge_id) 147 } 148 graph$from <- convert_to_char (graph$from) 149 graph$to <- convert_to_char (graph$to) 150 151 if (!"time_weighted" %in% names (graph)) { 152 graph$time_weighted <- graph$time 153 } 154 155 if (!is.null (tp)) { 156 attr (graph, "turn_penalty") <- tp 157 } 158 159 return (graph) 160} 161 162convert_to_char <- function (x) { 163 164 if (!is.character (x)) x <- paste0 (x) 165 return (x) 166} 167 168tbl_to_df <- function (graph) { 169 170 if (methods::is (graph, "tbl")) { 171 classes <- class (graph) [!grepl ("tbl", class (graph))] 172 graph <- as.data.frame (graph) 173 class (graph) <- classes 174 } 175 return (graph) 176} 177 178 179 180#' Extract vertices of graph, including spatial coordinates if included. 181#' 182#' @param graph A flat table of graph edges. Must contain columns labelled 183#' `from` and `to`, or `start` and `stop`. May also contain 184#' similarly labelled columns of spatial coordinates (for example 185#' `from_x`) or `stop_lon`). 186#' @return A `data.frame` of vertices with unique numbers (`n`). 187#' 188#' @note Values of `n` are 0-indexed 189#' 190#' @family misc 191#' @export 192#' @examples 193#' graph <- weight_streetnet (hampi) 194#' v <- dodgr_vertices (graph) 195dodgr_vertices <- function (graph) { 196 197 # vertices are calculated as background process, so wait if that's not 198 # finished. 199 if ("px" %in% names (attributes (graph))) { 200 px <- attr (graph, "px") 201 while (px$is_alive ()) { 202 px$wait () 203 } 204 } 205 206 hash <- ifelse (methods::is (graph, "dodgr_contracted"), 207 "hashc", "hash" 208 ) 209 hash <- attr (graph, hash) 210 # make sure rows of graph have not been changed 211 gr_cols <- dodgr_graph_cols (graph) 212 hashe <- digest::digest (graph [[gr_cols$edge_id]]) 213 if (!identical (hashe, hash)) { 214 hash <- NULL 215 } 216 if (!is.null (hash)) { 217 hashe_ref <- attr (graph, "hashe") 218 hashe_ref <- ifelse (is.null (hashe_ref), "", hashe_ref) 219 hashe <- digest::digest (graph [[gr_cols$edge_id]]) 220 if (hashe != hashe_ref) { 221 hash <- hashe 222 } 223 } else { 224 if (is.na (gr_cols$edge_id)) { 225 hash <- "" # nocov 226 } else { 227 hash <- get_hash (graph, contracted = FALSE, force = TRUE) 228 } 229 } 230 231 verts_up_to_date <- FALSE 232 233 fname <- fs::path (fs::path_temp (), paste0 ("dodgr_verts_", hash, ".Rds")) 234 if (hash != "" && fs::file_exists (fname)) { 235 verts <- readRDS (fname) 236 # Then double-check to ensure those vertices match current graph: 237 gr_cols <- dodgr_graph_cols (graph) 238 graph_verts <- unique (c (graph [[gr_cols$from]], graph [[gr_cols$to]])) 239 verts_up_to_date <- 240 (all (graph_verts %in% verts$id) && all (verts$id %in% graph_verts)) 241 } 242 243 if (!verts_up_to_date) { 244 verts <- dodgr_vertices_internal (graph) 245 saveRDS (verts, fname) 246 } 247 248 return (verts) 249} 250 251dodgr_vertices_internal <- function (graph) { 252 253 graph <- tbl_to_df (graph) 254 255 gr_cols <- dodgr_graph_cols (graph) 256 # cols are (edge_id, from, to, d, w, component, xfr, yfr, xto, yto) 257 # NOTE: c (x, y), where x and y are both factors gives junk, so explicit 258 # conversion required here: TODO: Find a better way? 259 if (is.factor (graph [[gr_cols$from]])) { 260 graph [[gr_cols$from]] <- paste0 (graph [[gr_cols$from]]) 261 } 262 if (is.factor (graph [[gr_cols$to]])) { 263 graph [[gr_cols$to]] <- paste0 (graph [[gr_cols$to]]) 264 } 265 266 if (is_graph_spatial (graph)) { 267 verts <- data.frame ( 268 id = c ( 269 graph [[gr_cols$from]], 270 graph [[gr_cols$to]] 271 ), 272 x = c ( 273 graph [[gr_cols$xfr]], 274 graph [[gr_cols$xto]] 275 ), 276 y = c ( 277 graph [[gr_cols$yfr]], 278 graph [[gr_cols$yto]] 279 ), 280 stringsAsFactors = FALSE 281 ) 282 if (!is.na (gr_cols$component)) { 283 verts$component <- graph [[gr_cols$component]] 284 } 285 } else { 286 verts <- data.frame ( 287 id = c ( 288 graph [[gr_cols$from]], 289 graph [[gr_cols$to]] 290 ), 291 stringsAsFactors = FALSE 292 ) 293 if (!is.na (gr_cols$component)) { 294 verts$component <- graph [[gr_cols$component]] 295 } 296 } 297 298 # The next line is the time-killer here, which is why this is cached 299 indx <- which (!duplicated (verts$id)) 300 verts <- verts [indx, , drop = FALSE] # nolint 301 verts$n <- seq_len (nrow (verts)) - 1 302 303 return (verts) 304} 305 306 307#' Identify connected components of graph. 308#' 309#' Identify connected components of graph and add corresponding `component` 310#' column to `data.frame`. 311#' 312#' @param graph A `data.frame` of edges 313#' @return Equivalent graph with additional `component` column, 314#' sequentially numbered from 1 = largest component. 315#' @family modification 316#' @export 317#' @examples 318#' graph <- weight_streetnet (hampi) 319#' graph <- dodgr_components (graph) 320dodgr_components <- function (graph) { 321 322 graph <- tbl_to_df (graph) 323 324 if ("component" %in% names (graph)) { 325 message ("graph already has a component column") 326 } else { 327 gr_cols <- dodgr_graph_cols (graph) 328 graph2 <- convert_graph (graph, gr_cols) 329 if (is.na (gr_cols$edge_id)) { 330 graph2$edge_id <- seq_len (nrow (graph2)) 331 } 332 cns <- rcpp_get_component_vector (graph2) 333 334 indx <- match (graph2$edge_id, cns$edge_id) 335 component <- cns$edge_component [indx] 336 # Then re-number in order to decreasing component size: 337 graph$component <- match ( 338 component, 339 order (table (component), decreasing = TRUE) 340 ) 341 } 342 343 return (graph) 344} 345 346#' Sample a random but connected sub-component of a graph 347#' 348#' @param graph A flat table of graph edges. Must contain columns labelled 349#' `from` and `to`, or `start` and `stop`. May also contain 350#' similarly labelled columns of spatial coordinates (for example 351#' `from_x`) or `stop_lon`). 352#' @param nverts Number of vertices to sample 353#' 354#' @return A connected sub-component of `graph` 355#' 356#' @note Graphs may occasionally have `nverts + 1` vertices, rather than 357#' the requested `nverts`. 358#' 359#' @family misc 360#' @export 361#' @examples 362#' graph <- weight_streetnet (hampi) 363#' nrow (graph) # 5,742 364#' graph <- dodgr_sample (graph, nverts = 200) 365#' nrow (graph) # generally around 400 edges 366#' nrow (dodgr_vertices (graph)) # 200 367dodgr_sample <- function (graph, nverts = 1000) { 368 369 graph <- tbl_to_df (graph) 370 371 fr <- find_fr_id_col (graph) 372 to <- find_to_id_col (graph) 373 verts <- unique (c (graph [, fr], graph [, to])) 374 gr_cols <- dodgr_graph_cols (graph) 375 if (is.na (gr_cols$edge_id)) { 376 graph$edge_id <- seq_len (nrow (graph)) 377 gr_cols <- dodgr_graph_cols (graph) 378 } 379 380 if (length (verts) > nverts) { 381 graph2 <- convert_graph (graph, gr_cols) 382 if ("component" %in% names (graph)) { 383 graph2$component <- graph$component 384 } 385 indx <- match (rcpp_sample_graph (graph2, nverts), graph2$edge_id) 386 graph <- graph [sort (indx), ] 387 } 388 389 attr (graph, "hash") <- get_hash (graph, contracted = FALSE, force = TRUE) 390 391 return (graph) 392} 393 394#' Insert a new node or vertex into a network 395#' 396#' @param v1 Vertex defining start of graph edge along which new vertex is to be 397#' inserted 398#' @param v2 Vertex defining end of graph edge along which new vertex is to be 399#' inserted (order of `v1` and `v2` is not important). 400#' @param x The `x`-coordinate of new vertex. If not specified, vertex is 401#' created half-way between `v1` and `v2`. 402#' @param y The `y`-coordinate of new vertex. If not specified, vertex is 403#' created half-way between `v1` and `v2`. 404#' @return A modified graph with specified edge between defined start and end 405#' vertices split into two edges either side of new vertex. 406#' @inheritParams dodgr_vertices 407#' 408#' @family misc 409#' @export 410#' @examples 411#' graph <- weight_streetnet (hampi) 412#' e1 <- sample (nrow (graph), 1) 413#' v1 <- graph$from_id [e1] 414#' v2 <- graph$to_id [e1] 415#' # insert new vertex in the middle of that randomly-selected edge: 416#' graph2 <- dodgr_insert_vertex (graph, v1, v2) 417#' nrow (graph) 418#' nrow (graph2) # new edges added to graph2 419dodgr_insert_vertex <- function (graph, v1, v2, x = NULL, y = NULL) { 420 421 graph_t <- tbl_to_df (graph) 422 gr_cols <- dodgr_graph_cols (graph_t) 423 index12 <- which (graph [[gr_cols$from]] == v1 & graph [[gr_cols$to]] == v2) 424 index21 <- which (graph [[gr_cols$from]] == v2 & graph [[gr_cols$to]] == v1) 425 if (length (index12) == 0 && length (index21) == 0) { 426 stop ("Nominated vertices do not define any edges in graph") 427 } 428 if ((!is.null (x) && is.null (y)) || (is.null (x) && !is.null (y))) { 429 stop ("Either both x and y must be NULL, or both must be specified") 430 } 431 432 433 charvec <- c (letters, LETTERS, 0:9) 434 randid <- function (charvec, len = 10) { 435 paste0 (sample (charvec, len, replace = TRUE), collapse = "") 436 } 437 438 if (length (index12) == 1) { 439 graph <- insert_one_edge (graph, index12, x, y, gr_cols) 440 graph [index12, gr_cols$to] <- 441 graph [index12 + 1, gr_cols$from] <- randid (charvec, 10) 442 index21 <- which (graph [[gr_cols$from]] == v2 & 443 graph [[gr_cols$to]] == v1) 444 } 445 if (length (index21) == 1) { 446 graph <- insert_one_edge (graph, index21, x, y, gr_cols) 447 graph [index21, gr_cols$to] <- 448 graph [index21 + 1, gr_cols$from] <- randid (charvec, 10) 449 } 450 451 attr (graph, "hash") <- get_hash (graph, contracted = FALSE, force = TRUE) 452 453 return (graph) 454} 455 456insert_one_edge <- function (graph, index, x, y, gr_cols) { 457 458 if (is.null (x) && is.null (y)) { 459 x <- (graph [[gr_cols$xfr]] [index] + 460 graph [[gr_cols$xto]] [index]) / 2 461 y <- (graph [[gr_cols$yfr]] [index] + 462 graph [[gr_cols$yto]] [index]) / 2 463 } 464 expand_index <- c ( 465 seq_len (index), 466 index, 467 seq_len (nrow (graph)) [-seq_len (index)] 468 ) 469 470 graph <- graph [expand_index, ] 471 graph [index, gr_cols$xto] <- x 472 graph [index, gr_cols$yto] <- y 473 graph [index + 1, gr_cols$xfr] <- x 474 graph [index + 1, gr_cols$yfr] <- y 475 476 xy1 <- c ( 477 x = graph [[gr_cols$xfr]] [index], 478 y = graph [[gr_cols$yfr]] [index] 479 ) 480 xy2 <- c ( 481 x = graph [[gr_cols$xto]] [index + 1], 482 y = graph [[gr_cols$yto]] [index + 1] 483 ) 484 if (is_graph_spatial (graph)) { 485 # Get geodist measure, noting that graph has no hash at this stage, so 486 # full calculation will be executed. 487 measure <- get_geodist_measure (graph) 488 489 d1 <- geodist::geodist (xy1, c (x = x, y = y), measure = measure) 490 d2 <- geodist::geodist (xy2, c (x = x, y = y), measure = measure) 491 } else { 492 d1 <- sqrt ((xy1 [1] - x)^2 + (xy1 [2] - y)^2) 493 d2 <- sqrt ((x - xy2 [1])^2 + (y - xy2 [2])^2) 494 } 495 wt <- graph [index, gr_cols$d_weighted] / 496 graph [index, gr_cols$d] 497 if (!is.na (gr_cols$time)) { 498 time_scale <- graph [index, gr_cols$time] / 499 graph [index, gr_cols$d] 500 time_wt <- graph [index, gr_cols$time_weighted] / 501 graph [index, gr_cols$time] 502 } 503 graph [index, gr_cols$d] <- d1 504 graph [index, gr_cols$d_weighted] <- d1 * wt 505 graph [index + 1, gr_cols$d] <- d2 506 graph [index + 1, gr_cols$d_weighted] <- d2 * wt 507 508 if (!is.na (gr_cols$time)) { 509 graph [index, gr_cols$time] <- d1 * time_scale 510 graph [index + 1, gr_cols$time] <- d2 * time_scale 511 graph [index, gr_cols$time_weighted] <- d1 * time_scale * time_wt 512 graph [index + 1, gr_cols$time_weighted] <- d2 * time_scale * time_wt 513 } 514 515 graph$edge_id [index] <- paste0 (graph$edge_id [index], "_a") 516 graph$edge_id [index + 1] <- paste0 (graph$edge_id [index + 1], "_b") 517 518 return (graph) 519}