Git fork
at reftables-rust 655 lines 17 kB view raw
1/* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style 5 * license that can be found in the LICENSE file or at 6 * https://developers.google.com/open-source/licenses/bsd 7 */ 8 9#include "block.h" 10 11#include "blocksource.h" 12#include "constants.h" 13#include "iter.h" 14#include "record.h" 15#include "reftable-error.h" 16#include "system.h" 17 18size_t header_size(int version) 19{ 20 switch (version) { 21 case 1: 22 return 24; 23 case 2: 24 return 28; 25 } 26 abort(); 27} 28 29size_t footer_size(int version) 30{ 31 switch (version) { 32 case 1: 33 return 68; 34 case 2: 35 return 72; 36 } 37 abort(); 38} 39 40static int block_writer_register_restart(struct block_writer *w, int n, 41 int is_restart, struct reftable_buf *key) 42{ 43 uint32_t rlen; 44 int err; 45 46 rlen = w->restart_len; 47 if (rlen >= MAX_RESTARTS) 48 is_restart = 0; 49 50 if (is_restart) 51 rlen++; 52 if (2 + 3 * rlen + n > w->block_size - w->next) 53 return REFTABLE_ENTRY_TOO_BIG_ERROR; 54 if (is_restart) { 55 REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1, 56 w->restart_cap); 57 if (!w->restarts) 58 return REFTABLE_OUT_OF_MEMORY_ERROR; 59 w->restarts[w->restart_len++] = w->next; 60 } 61 62 w->next += n; 63 64 reftable_buf_reset(&w->last_key); 65 err = reftable_buf_add(&w->last_key, key->buf, key->len); 66 if (err < 0) 67 return err; 68 69 w->entries++; 70 return 0; 71} 72 73int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, 74 uint32_t block_size, uint32_t header_off, uint32_t hash_size) 75{ 76 bw->block = block; 77 bw->hash_size = hash_size; 78 bw->block_size = block_size; 79 bw->header_off = header_off; 80 bw->block[header_off] = typ; 81 bw->next = header_off + 4; 82 bw->restart_interval = 16; 83 bw->entries = 0; 84 bw->restart_len = 0; 85 bw->last_key.len = 0; 86 if (!bw->zstream) { 87 REFTABLE_CALLOC_ARRAY(bw->zstream, 1); 88 if (!bw->zstream) 89 return REFTABLE_OUT_OF_MEMORY_ERROR; 90 deflateInit(bw->zstream, 9); 91 } 92 93 return 0; 94} 95 96uint8_t block_writer_type(struct block_writer *bw) 97{ 98 return bw->block[bw->header_off]; 99} 100 101/* 102 * Adds the reftable_record to the block. Returns 0 on success and 103 * appropriate error codes on failure. 104 */ 105int block_writer_add(struct block_writer *w, struct reftable_record *rec) 106{ 107 struct reftable_buf empty = REFTABLE_BUF_INIT; 108 struct reftable_buf last = 109 w->entries % w->restart_interval == 0 ? empty : w->last_key; 110 struct string_view out = { 111 .buf = w->block + w->next, 112 .len = w->block_size - w->next, 113 }; 114 struct string_view start = out; 115 int is_restart = 0; 116 int n = 0; 117 int err; 118 119 err = reftable_record_key(rec, &w->scratch); 120 if (err < 0) 121 goto done; 122 123 if (!w->scratch.len) { 124 err = REFTABLE_API_ERROR; 125 goto done; 126 } 127 128 n = reftable_encode_key(&is_restart, out, last, w->scratch, 129 reftable_record_val_type(rec)); 130 if (n < 0) { 131 err = n; 132 goto done; 133 } 134 string_view_consume(&out, n); 135 136 n = reftable_record_encode(rec, out, w->hash_size); 137 if (n < 0) { 138 err = n; 139 goto done; 140 } 141 string_view_consume(&out, n); 142 143 err = block_writer_register_restart(w, start.len - out.len, is_restart, 144 &w->scratch); 145done: 146 return err; 147} 148 149int block_writer_finish(struct block_writer *w) 150{ 151 for (uint32_t i = 0; i < w->restart_len; i++) { 152 reftable_put_be24(w->block + w->next, w->restarts[i]); 153 w->next += 3; 154 } 155 156 reftable_put_be16(w->block + w->next, w->restart_len); 157 w->next += 2; 158 reftable_put_be24(w->block + 1 + w->header_off, w->next); 159 160 /* 161 * Log records are stored zlib-compressed. Note that the compression 162 * also spans over the restart points we have just written. 163 */ 164 if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) { 165 int block_header_skip = 4 + w->header_off; 166 uLongf src_len = w->next - block_header_skip, compressed_len; 167 int ret; 168 169 ret = deflateReset(w->zstream); 170 if (ret != Z_OK) 171 return REFTABLE_ZLIB_ERROR; 172 173 /* 174 * Precompute the upper bound of how many bytes the compressed 175 * data may end up with. Combined with `Z_FINISH`, `deflate()` 176 * is guaranteed to return `Z_STREAM_END`. 177 */ 178 compressed_len = deflateBound(w->zstream, src_len); 179 REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len, 180 w->compressed_cap); 181 if (!w->compressed) { 182 ret = REFTABLE_OUT_OF_MEMORY_ERROR; 183 return ret; 184 } 185 186 w->zstream->next_out = w->compressed; 187 w->zstream->avail_out = compressed_len; 188 w->zstream->next_in = w->block + block_header_skip; 189 w->zstream->avail_in = src_len; 190 191 /* 192 * We want to perform all decompression in a single step, which 193 * is why we can pass Z_FINISH here. As we have precomputed the 194 * deflated buffer's size via `deflateBound()` this function is 195 * guaranteed to succeed according to the zlib documentation. 196 */ 197 ret = deflate(w->zstream, Z_FINISH); 198 if (ret != Z_STREAM_END) 199 return REFTABLE_ZLIB_ERROR; 200 201 /* 202 * Overwrite the uncompressed data we have already written and 203 * adjust the `next` pointer to point right after the 204 * compressed data. 205 */ 206 memcpy(w->block + block_header_skip, w->compressed, 207 w->zstream->total_out); 208 w->next = w->zstream->total_out + block_header_skip; 209 } 210 211 return w->next; 212} 213 214static int read_block(struct reftable_block_source *source, 215 struct reftable_block_data *dest, uint64_t off, 216 uint32_t sz) 217{ 218 size_t size = block_source_size(source); 219 block_source_release_data(dest); 220 if (off >= size) 221 return 0; 222 if (off + sz > size) 223 sz = size - off; 224 return block_source_read_data(source, dest, off, sz); 225} 226 227int reftable_block_init(struct reftable_block *block, 228 struct reftable_block_source *source, 229 uint32_t offset, uint32_t header_size, 230 uint32_t table_block_size, uint32_t hash_size, 231 uint8_t want_type) 232{ 233 uint32_t guess_block_size = table_block_size ? 234 table_block_size : DEFAULT_BLOCK_SIZE; 235 uint32_t full_block_size = table_block_size; 236 uint16_t restart_count; 237 uint32_t restart_off; 238 uint32_t block_size; 239 uint8_t block_type; 240 int err; 241 242 err = read_block(source, &block->block_data, offset, guess_block_size); 243 if (err < 0) 244 goto done; 245 246 block_type = block->block_data.data[header_size]; 247 if (!reftable_is_block_type(block_type)) { 248 err = REFTABLE_FORMAT_ERROR; 249 goto done; 250 } 251 if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) { 252 err = 1; 253 goto done; 254 } 255 256 block_size = reftable_get_be24(block->block_data.data + header_size + 1); 257 if (block_size > guess_block_size) { 258 err = read_block(source, &block->block_data, offset, block_size); 259 if (err < 0) 260 goto done; 261 } 262 263 if (block_type == REFTABLE_BLOCK_TYPE_LOG) { 264 uint32_t block_header_skip = 4 + header_size; 265 uLong dst_len = block_size - block_header_skip; 266 uLong src_len = block->block_data.len - block_header_skip; 267 268 /* Log blocks specify the *uncompressed* size in their header. */ 269 REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size, 270 block->uncompressed_cap); 271 if (!block->uncompressed_data) { 272 err = REFTABLE_OUT_OF_MEMORY_ERROR; 273 goto done; 274 } 275 276 /* Copy over the block header verbatim. It's not compressed. */ 277 memcpy(block->uncompressed_data, block->block_data.data, block_header_skip); 278 279 if (!block->zstream) { 280 REFTABLE_CALLOC_ARRAY(block->zstream, 1); 281 if (!block->zstream) { 282 err = REFTABLE_OUT_OF_MEMORY_ERROR; 283 goto done; 284 } 285 286 err = inflateInit(block->zstream); 287 } else { 288 err = inflateReset(block->zstream); 289 } 290 if (err != Z_OK) { 291 err = REFTABLE_ZLIB_ERROR; 292 goto done; 293 } 294 295 block->zstream->next_in = block->block_data.data + block_header_skip; 296 block->zstream->avail_in = src_len; 297 block->zstream->next_out = block->uncompressed_data + block_header_skip; 298 block->zstream->avail_out = dst_len; 299 300 /* 301 * We know both input as well as output size, and we know that 302 * the sizes should never be bigger than `uInt_MAX` because 303 * blocks can at most be 16MB large. We can thus use `Z_FINISH` 304 * here to instruct zlib to inflate the data in one go, which 305 * is more efficient than using `Z_NO_FLUSH`. 306 */ 307 err = inflate(block->zstream, Z_FINISH); 308 if (err != Z_STREAM_END) { 309 err = REFTABLE_ZLIB_ERROR; 310 goto done; 311 } 312 err = 0; 313 314 if (block->zstream->total_out + block_header_skip != block_size) { 315 err = REFTABLE_FORMAT_ERROR; 316 goto done; 317 } 318 319 /* We're done with the input data. */ 320 block_source_release_data(&block->block_data); 321 block->block_data.data = block->uncompressed_data; 322 block->block_data.len = block_size; 323 full_block_size = src_len + block_header_skip - block->zstream->avail_in; 324 } else if (full_block_size == 0) { 325 full_block_size = block_size; 326 } else if (block_size < full_block_size && block_size < block->block_data.len && 327 block->block_data.data[block_size] != 0) { 328 /* If the block is smaller than the full block size, it is 329 padded (data followed by '\0') or the next block is 330 unaligned. */ 331 full_block_size = block_size; 332 } 333 334 restart_count = reftable_get_be16(block->block_data.data + block_size - 2); 335 restart_off = block_size - 2 - 3 * restart_count; 336 337 block->block_type = block_type; 338 block->hash_size = hash_size; 339 block->restart_off = restart_off; 340 block->full_block_size = full_block_size; 341 block->header_off = header_size; 342 block->restart_count = restart_count; 343 344 err = 0; 345 346done: 347 if (err < 0) 348 reftable_block_release(block); 349 return err; 350} 351 352void reftable_block_release(struct reftable_block *block) 353{ 354 inflateEnd(block->zstream); 355 reftable_free(block->zstream); 356 reftable_free(block->uncompressed_data); 357 block_source_release_data(&block->block_data); 358 memset(block, 0, sizeof(*block)); 359} 360 361uint8_t reftable_block_type(const struct reftable_block *b) 362{ 363 return b->block_data.data[b->header_off]; 364} 365 366int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key) 367{ 368 int off = block->header_off + 4, n; 369 struct string_view in = { 370 .buf = block->block_data.data + off, 371 .len = block->restart_off - off, 372 }; 373 uint8_t extra = 0; 374 375 reftable_buf_reset(key); 376 377 n = reftable_decode_key(key, &extra, in); 378 if (n < 0) 379 return n; 380 if (!key->len) 381 return REFTABLE_FORMAT_ERROR; 382 383 return 0; 384} 385 386static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx) 387{ 388 return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx); 389} 390 391void block_iter_init(struct block_iter *it, const struct reftable_block *block) 392{ 393 it->block = block; 394 block_iter_seek_start(it); 395} 396 397void block_iter_seek_start(struct block_iter *it) 398{ 399 reftable_buf_reset(&it->last_key); 400 it->next_off = it->block->header_off + 4; 401} 402 403struct restart_needle_less_args { 404 int error; 405 struct reftable_buf needle; 406 const struct reftable_block *block; 407}; 408 409static int restart_needle_less(size_t idx, void *_args) 410{ 411 struct restart_needle_less_args *args = _args; 412 uint32_t off = block_restart_offset(args->block, idx); 413 struct string_view in = { 414 .buf = args->block->block_data.data + off, 415 .len = args->block->restart_off - off, 416 }; 417 uint64_t prefix_len, suffix_len; 418 uint8_t extra; 419 int n; 420 421 /* 422 * Records at restart points are stored without prefix compression, so 423 * there is no need to fully decode the record key here. This removes 424 * the need for allocating memory. 425 */ 426 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); 427 if (n < 0 || prefix_len) { 428 args->error = 1; 429 return -1; 430 } 431 432 string_view_consume(&in, n); 433 if (suffix_len > in.len) { 434 args->error = 1; 435 return -1; 436 } 437 438 n = memcmp(args->needle.buf, in.buf, 439 args->needle.len < suffix_len ? args->needle.len : suffix_len); 440 if (n) 441 return n < 0; 442 return args->needle.len < suffix_len; 443} 444 445int block_iter_next(struct block_iter *it, struct reftable_record *rec) 446{ 447 struct string_view in = { 448 .buf = (unsigned char *) it->block->block_data.data + it->next_off, 449 .len = it->block->restart_off - it->next_off, 450 }; 451 struct string_view start = in; 452 uint8_t extra = 0; 453 int n = 0; 454 455 if (it->next_off >= it->block->restart_off) 456 return 1; 457 458 n = reftable_decode_key(&it->last_key, &extra, in); 459 if (n < 0) 460 return -1; 461 if (!it->last_key.len) 462 return REFTABLE_FORMAT_ERROR; 463 464 string_view_consume(&in, n); 465 n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size, 466 &it->scratch); 467 if (n < 0) 468 return -1; 469 string_view_consume(&in, n); 470 471 it->next_off += start.len - in.len; 472 return 0; 473} 474 475void block_iter_reset(struct block_iter *it) 476{ 477 reftable_buf_reset(&it->last_key); 478 it->next_off = 0; 479 it->block = NULL; 480} 481 482void block_iter_close(struct block_iter *it) 483{ 484 reftable_buf_release(&it->last_key); 485 reftable_buf_release(&it->scratch); 486} 487 488int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) 489{ 490 struct restart_needle_less_args args = { 491 .needle = *want, 492 .block = it->block, 493 }; 494 struct reftable_record rec; 495 int err = 0; 496 size_t i; 497 498 /* 499 * Perform a binary search over the block's restart points, which 500 * avoids doing a linear scan over the whole block. Like this, we 501 * identify the section of the block that should contain our key. 502 * 503 * Note that we explicitly search for the first restart point _greater_ 504 * than the sought-after record, not _greater or equal_ to it. In case 505 * the sought-after record is located directly at the restart point we 506 * would otherwise start doing the linear search at the preceding 507 * restart point. While that works alright, we would end up scanning 508 * too many record. 509 */ 510 i = binsearch(it->block->restart_count, &restart_needle_less, &args); 511 if (args.error) { 512 err = REFTABLE_FORMAT_ERROR; 513 goto done; 514 } 515 516 /* 517 * Now there are multiple cases: 518 * 519 * - `i == 0`: The wanted record is smaller than the record found at 520 * the first restart point. As the first restart point is the first 521 * record in the block, our wanted record cannot be located in this 522 * block at all. We still need to position the iterator so that the 523 * next call to `block_iter_next()` will yield an end-of-iterator 524 * signal. 525 * 526 * - `i == restart_count`: The wanted record was not found at any of 527 * the restart points. As there is no restart point at the end of 528 * the section the record may thus be contained in the last block. 529 * 530 * - `i > 0`: The wanted record must be contained in the section 531 * before the found restart point. We thus do a linear search 532 * starting from the preceding restart point. 533 */ 534 if (i > 0) 535 it->next_off = block_restart_offset(it->block, i - 1); 536 else 537 it->next_off = it->block->header_off + 4; 538 539 err = reftable_record_init(&rec, reftable_block_type(it->block)); 540 if (err < 0) 541 goto done; 542 543 /* 544 * We're looking for the last entry less than the wanted key so that 545 * the next call to `block_reader_next()` would yield the wanted 546 * record. We thus don't want to position our iterator at the sought 547 * after record, but one before. To do so, we have to go one entry too 548 * far and then back up. 549 */ 550 while (1) { 551 size_t prev_off = it->next_off; 552 553 err = block_iter_next(it, &rec); 554 if (err < 0) 555 goto done; 556 if (err > 0) { 557 it->next_off = prev_off; 558 err = 0; 559 goto done; 560 } 561 562 err = reftable_record_key(&rec, &it->last_key); 563 if (err < 0) 564 goto done; 565 566 /* 567 * Check whether the current key is greater or equal to the 568 * sought-after key. In case it is greater we know that the 569 * record does not exist in the block and can thus abort early. 570 * In case it is equal to the sought-after key we have found 571 * the desired record. 572 * 573 * Note that we store the next record's key record directly in 574 * `last_key` without restoring the key of the preceding record 575 * in case we need to go one record back. This is safe to do as 576 * `block_iter_next()` would return the ref whose key is equal 577 * to `last_key` now, and naturally all keys share a prefix 578 * with themselves. 579 */ 580 if (reftable_buf_cmp(&it->last_key, want) >= 0) { 581 it->next_off = prev_off; 582 goto done; 583 } 584 } 585 586done: 587 reftable_record_release(&rec); 588 return err; 589} 590 591static int block_iter_seek_void(void *it, struct reftable_record *want) 592{ 593 struct reftable_buf buf = REFTABLE_BUF_INIT; 594 struct block_iter *bi = it; 595 int err; 596 597 if (bi->block->block_type != want->type) 598 return REFTABLE_API_ERROR; 599 600 err = reftable_record_key(want, &buf); 601 if (err < 0) 602 goto out; 603 604 err = block_iter_seek_key(it, &buf); 605 if (err < 0) 606 goto out; 607 608 err = 0; 609 610out: 611 reftable_buf_release(&buf); 612 return err; 613} 614 615static int block_iter_next_void(void *it, struct reftable_record *rec) 616{ 617 return block_iter_next(it, rec); 618} 619 620static void block_iter_close_void(void *it) 621{ 622 block_iter_close(it); 623} 624 625static struct reftable_iterator_vtable block_iter_vtable = { 626 .seek = &block_iter_seek_void, 627 .next = &block_iter_next_void, 628 .close = &block_iter_close_void, 629}; 630 631int reftable_block_init_iterator(const struct reftable_block *b, 632 struct reftable_iterator *it) 633{ 634 struct block_iter *bi; 635 636 REFTABLE_CALLOC_ARRAY(bi, 1); 637 block_iter_init(bi, b); 638 639 assert(!it->ops); 640 it->iter_arg = bi; 641 it->ops = &block_iter_vtable; 642 643 return 0; 644} 645 646void block_writer_release(struct block_writer *bw) 647{ 648 deflateEnd(bw->zstream); 649 REFTABLE_FREE_AND_NULL(bw->zstream); 650 REFTABLE_FREE_AND_NULL(bw->restarts); 651 REFTABLE_FREE_AND_NULL(bw->compressed); 652 reftable_buf_release(&bw->scratch); 653 reftable_buf_release(&bw->last_key); 654 /* the block is not owned. */ 655}