Git fork
1/*
2 * Copyright 2020 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style
5 * license that can be found in the LICENSE file or at
6 * https://developers.google.com/open-source/licenses/bsd
7 */
8
9#include "block.h"
10
11#include "blocksource.h"
12#include "constants.h"
13#include "iter.h"
14#include "record.h"
15#include "reftable-error.h"
16#include "system.h"
17
18size_t header_size(int version)
19{
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
25 }
26 abort();
27}
28
29size_t footer_size(int version)
30{
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
36 }
37 abort();
38}
39
40static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct reftable_buf *key)
42{
43 uint32_t rlen;
44 int err;
45
46 rlen = w->restart_len;
47 if (rlen >= MAX_RESTARTS)
48 is_restart = 0;
49
50 if (is_restart)
51 rlen++;
52 if (2 + 3 * rlen + n > w->block_size - w->next)
53 return REFTABLE_ENTRY_TOO_BIG_ERROR;
54 if (is_restart) {
55 REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1,
56 w->restart_cap);
57 if (!w->restarts)
58 return REFTABLE_OUT_OF_MEMORY_ERROR;
59 w->restarts[w->restart_len++] = w->next;
60 }
61
62 w->next += n;
63
64 reftable_buf_reset(&w->last_key);
65 err = reftable_buf_add(&w->last_key, key->buf, key->len);
66 if (err < 0)
67 return err;
68
69 w->entries++;
70 return 0;
71}
72
73int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
74 uint32_t block_size, uint32_t header_off, uint32_t hash_size)
75{
76 bw->block = block;
77 bw->hash_size = hash_size;
78 bw->block_size = block_size;
79 bw->header_off = header_off;
80 bw->block[header_off] = typ;
81 bw->next = header_off + 4;
82 bw->restart_interval = 16;
83 bw->entries = 0;
84 bw->restart_len = 0;
85 bw->last_key.len = 0;
86 if (!bw->zstream) {
87 REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
88 if (!bw->zstream)
89 return REFTABLE_OUT_OF_MEMORY_ERROR;
90 deflateInit(bw->zstream, 9);
91 }
92
93 return 0;
94}
95
96uint8_t block_writer_type(struct block_writer *bw)
97{
98 return bw->block[bw->header_off];
99}
100
101/*
102 * Adds the reftable_record to the block. Returns 0 on success and
103 * appropriate error codes on failure.
104 */
105int block_writer_add(struct block_writer *w, struct reftable_record *rec)
106{
107 struct reftable_buf empty = REFTABLE_BUF_INIT;
108 struct reftable_buf last =
109 w->entries % w->restart_interval == 0 ? empty : w->last_key;
110 struct string_view out = {
111 .buf = w->block + w->next,
112 .len = w->block_size - w->next,
113 };
114 struct string_view start = out;
115 int is_restart = 0;
116 int n = 0;
117 int err;
118
119 err = reftable_record_key(rec, &w->scratch);
120 if (err < 0)
121 goto done;
122
123 if (!w->scratch.len) {
124 err = REFTABLE_API_ERROR;
125 goto done;
126 }
127
128 n = reftable_encode_key(&is_restart, out, last, w->scratch,
129 reftable_record_val_type(rec));
130 if (n < 0) {
131 err = n;
132 goto done;
133 }
134 string_view_consume(&out, n);
135
136 n = reftable_record_encode(rec, out, w->hash_size);
137 if (n < 0) {
138 err = n;
139 goto done;
140 }
141 string_view_consume(&out, n);
142
143 err = block_writer_register_restart(w, start.len - out.len, is_restart,
144 &w->scratch);
145done:
146 return err;
147}
148
149int block_writer_finish(struct block_writer *w)
150{
151 for (uint32_t i = 0; i < w->restart_len; i++) {
152 reftable_put_be24(w->block + w->next, w->restarts[i]);
153 w->next += 3;
154 }
155
156 reftable_put_be16(w->block + w->next, w->restart_len);
157 w->next += 2;
158 reftable_put_be24(w->block + 1 + w->header_off, w->next);
159
160 /*
161 * Log records are stored zlib-compressed. Note that the compression
162 * also spans over the restart points we have just written.
163 */
164 if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) {
165 int block_header_skip = 4 + w->header_off;
166 uLongf src_len = w->next - block_header_skip, compressed_len;
167 int ret;
168
169 ret = deflateReset(w->zstream);
170 if (ret != Z_OK)
171 return REFTABLE_ZLIB_ERROR;
172
173 /*
174 * Precompute the upper bound of how many bytes the compressed
175 * data may end up with. Combined with `Z_FINISH`, `deflate()`
176 * is guaranteed to return `Z_STREAM_END`.
177 */
178 compressed_len = deflateBound(w->zstream, src_len);
179 REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len,
180 w->compressed_cap);
181 if (!w->compressed) {
182 ret = REFTABLE_OUT_OF_MEMORY_ERROR;
183 return ret;
184 }
185
186 w->zstream->next_out = w->compressed;
187 w->zstream->avail_out = compressed_len;
188 w->zstream->next_in = w->block + block_header_skip;
189 w->zstream->avail_in = src_len;
190
191 /*
192 * We want to perform all decompression in a single step, which
193 * is why we can pass Z_FINISH here. As we have precomputed the
194 * deflated buffer's size via `deflateBound()` this function is
195 * guaranteed to succeed according to the zlib documentation.
196 */
197 ret = deflate(w->zstream, Z_FINISH);
198 if (ret != Z_STREAM_END)
199 return REFTABLE_ZLIB_ERROR;
200
201 /*
202 * Overwrite the uncompressed data we have already written and
203 * adjust the `next` pointer to point right after the
204 * compressed data.
205 */
206 memcpy(w->block + block_header_skip, w->compressed,
207 w->zstream->total_out);
208 w->next = w->zstream->total_out + block_header_skip;
209 }
210
211 return w->next;
212}
213
214static int read_block(struct reftable_block_source *source,
215 struct reftable_block_data *dest, uint64_t off,
216 uint32_t sz)
217{
218 size_t size = block_source_size(source);
219 block_source_release_data(dest);
220 if (off >= size)
221 return 0;
222 if (off + sz > size)
223 sz = size - off;
224 return block_source_read_data(source, dest, off, sz);
225}
226
227int reftable_block_init(struct reftable_block *block,
228 struct reftable_block_source *source,
229 uint32_t offset, uint32_t header_size,
230 uint32_t table_block_size, uint32_t hash_size,
231 uint8_t want_type)
232{
233 uint32_t guess_block_size = table_block_size ?
234 table_block_size : DEFAULT_BLOCK_SIZE;
235 uint32_t full_block_size = table_block_size;
236 uint16_t restart_count;
237 uint32_t restart_off;
238 uint32_t block_size;
239 uint8_t block_type;
240 int err;
241
242 err = read_block(source, &block->block_data, offset, guess_block_size);
243 if (err < 0)
244 goto done;
245
246 block_type = block->block_data.data[header_size];
247 if (!reftable_is_block_type(block_type)) {
248 err = REFTABLE_FORMAT_ERROR;
249 goto done;
250 }
251 if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) {
252 err = 1;
253 goto done;
254 }
255
256 block_size = reftable_get_be24(block->block_data.data + header_size + 1);
257 if (block_size > guess_block_size) {
258 err = read_block(source, &block->block_data, offset, block_size);
259 if (err < 0)
260 goto done;
261 }
262
263 if (block_type == REFTABLE_BLOCK_TYPE_LOG) {
264 uint32_t block_header_skip = 4 + header_size;
265 uLong dst_len = block_size - block_header_skip;
266 uLong src_len = block->block_data.len - block_header_skip;
267
268 /* Log blocks specify the *uncompressed* size in their header. */
269 REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size,
270 block->uncompressed_cap);
271 if (!block->uncompressed_data) {
272 err = REFTABLE_OUT_OF_MEMORY_ERROR;
273 goto done;
274 }
275
276 /* Copy over the block header verbatim. It's not compressed. */
277 memcpy(block->uncompressed_data, block->block_data.data, block_header_skip);
278
279 if (!block->zstream) {
280 REFTABLE_CALLOC_ARRAY(block->zstream, 1);
281 if (!block->zstream) {
282 err = REFTABLE_OUT_OF_MEMORY_ERROR;
283 goto done;
284 }
285
286 err = inflateInit(block->zstream);
287 } else {
288 err = inflateReset(block->zstream);
289 }
290 if (err != Z_OK) {
291 err = REFTABLE_ZLIB_ERROR;
292 goto done;
293 }
294
295 block->zstream->next_in = block->block_data.data + block_header_skip;
296 block->zstream->avail_in = src_len;
297 block->zstream->next_out = block->uncompressed_data + block_header_skip;
298 block->zstream->avail_out = dst_len;
299
300 /*
301 * We know both input as well as output size, and we know that
302 * the sizes should never be bigger than `uInt_MAX` because
303 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
304 * here to instruct zlib to inflate the data in one go, which
305 * is more efficient than using `Z_NO_FLUSH`.
306 */
307 err = inflate(block->zstream, Z_FINISH);
308 if (err != Z_STREAM_END) {
309 err = REFTABLE_ZLIB_ERROR;
310 goto done;
311 }
312 err = 0;
313
314 if (block->zstream->total_out + block_header_skip != block_size) {
315 err = REFTABLE_FORMAT_ERROR;
316 goto done;
317 }
318
319 /* We're done with the input data. */
320 block_source_release_data(&block->block_data);
321 block->block_data.data = block->uncompressed_data;
322 block->block_data.len = block_size;
323 full_block_size = src_len + block_header_skip - block->zstream->avail_in;
324 } else if (full_block_size == 0) {
325 full_block_size = block_size;
326 } else if (block_size < full_block_size && block_size < block->block_data.len &&
327 block->block_data.data[block_size] != 0) {
328 /* If the block is smaller than the full block size, it is
329 padded (data followed by '\0') or the next block is
330 unaligned. */
331 full_block_size = block_size;
332 }
333
334 restart_count = reftable_get_be16(block->block_data.data + block_size - 2);
335 restart_off = block_size - 2 - 3 * restart_count;
336
337 block->block_type = block_type;
338 block->hash_size = hash_size;
339 block->restart_off = restart_off;
340 block->full_block_size = full_block_size;
341 block->header_off = header_size;
342 block->restart_count = restart_count;
343
344 err = 0;
345
346done:
347 if (err < 0)
348 reftable_block_release(block);
349 return err;
350}
351
352void reftable_block_release(struct reftable_block *block)
353{
354 inflateEnd(block->zstream);
355 reftable_free(block->zstream);
356 reftable_free(block->uncompressed_data);
357 block_source_release_data(&block->block_data);
358 memset(block, 0, sizeof(*block));
359}
360
361uint8_t reftable_block_type(const struct reftable_block *b)
362{
363 return b->block_data.data[b->header_off];
364}
365
366int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key)
367{
368 int off = block->header_off + 4, n;
369 struct string_view in = {
370 .buf = block->block_data.data + off,
371 .len = block->restart_off - off,
372 };
373 uint8_t extra = 0;
374
375 reftable_buf_reset(key);
376
377 n = reftable_decode_key(key, &extra, in);
378 if (n < 0)
379 return n;
380 if (!key->len)
381 return REFTABLE_FORMAT_ERROR;
382
383 return 0;
384}
385
386static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx)
387{
388 return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx);
389}
390
391void block_iter_init(struct block_iter *it, const struct reftable_block *block)
392{
393 it->block = block;
394 block_iter_seek_start(it);
395}
396
397void block_iter_seek_start(struct block_iter *it)
398{
399 reftable_buf_reset(&it->last_key);
400 it->next_off = it->block->header_off + 4;
401}
402
403struct restart_needle_less_args {
404 int error;
405 struct reftable_buf needle;
406 const struct reftable_block *block;
407};
408
409static int restart_needle_less(size_t idx, void *_args)
410{
411 struct restart_needle_less_args *args = _args;
412 uint32_t off = block_restart_offset(args->block, idx);
413 struct string_view in = {
414 .buf = args->block->block_data.data + off,
415 .len = args->block->restart_off - off,
416 };
417 uint64_t prefix_len, suffix_len;
418 uint8_t extra;
419 int n;
420
421 /*
422 * Records at restart points are stored without prefix compression, so
423 * there is no need to fully decode the record key here. This removes
424 * the need for allocating memory.
425 */
426 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
427 if (n < 0 || prefix_len) {
428 args->error = 1;
429 return -1;
430 }
431
432 string_view_consume(&in, n);
433 if (suffix_len > in.len) {
434 args->error = 1;
435 return -1;
436 }
437
438 n = memcmp(args->needle.buf, in.buf,
439 args->needle.len < suffix_len ? args->needle.len : suffix_len);
440 if (n)
441 return n < 0;
442 return args->needle.len < suffix_len;
443}
444
445int block_iter_next(struct block_iter *it, struct reftable_record *rec)
446{
447 struct string_view in = {
448 .buf = (unsigned char *) it->block->block_data.data + it->next_off,
449 .len = it->block->restart_off - it->next_off,
450 };
451 struct string_view start = in;
452 uint8_t extra = 0;
453 int n = 0;
454
455 if (it->next_off >= it->block->restart_off)
456 return 1;
457
458 n = reftable_decode_key(&it->last_key, &extra, in);
459 if (n < 0)
460 return -1;
461 if (!it->last_key.len)
462 return REFTABLE_FORMAT_ERROR;
463
464 string_view_consume(&in, n);
465 n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size,
466 &it->scratch);
467 if (n < 0)
468 return -1;
469 string_view_consume(&in, n);
470
471 it->next_off += start.len - in.len;
472 return 0;
473}
474
475void block_iter_reset(struct block_iter *it)
476{
477 reftable_buf_reset(&it->last_key);
478 it->next_off = 0;
479 it->block = NULL;
480}
481
482void block_iter_close(struct block_iter *it)
483{
484 reftable_buf_release(&it->last_key);
485 reftable_buf_release(&it->scratch);
486}
487
488int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
489{
490 struct restart_needle_less_args args = {
491 .needle = *want,
492 .block = it->block,
493 };
494 struct reftable_record rec;
495 int err = 0;
496 size_t i;
497
498 /*
499 * Perform a binary search over the block's restart points, which
500 * avoids doing a linear scan over the whole block. Like this, we
501 * identify the section of the block that should contain our key.
502 *
503 * Note that we explicitly search for the first restart point _greater_
504 * than the sought-after record, not _greater or equal_ to it. In case
505 * the sought-after record is located directly at the restart point we
506 * would otherwise start doing the linear search at the preceding
507 * restart point. While that works alright, we would end up scanning
508 * too many record.
509 */
510 i = binsearch(it->block->restart_count, &restart_needle_less, &args);
511 if (args.error) {
512 err = REFTABLE_FORMAT_ERROR;
513 goto done;
514 }
515
516 /*
517 * Now there are multiple cases:
518 *
519 * - `i == 0`: The wanted record is smaller than the record found at
520 * the first restart point. As the first restart point is the first
521 * record in the block, our wanted record cannot be located in this
522 * block at all. We still need to position the iterator so that the
523 * next call to `block_iter_next()` will yield an end-of-iterator
524 * signal.
525 *
526 * - `i == restart_count`: The wanted record was not found at any of
527 * the restart points. As there is no restart point at the end of
528 * the section the record may thus be contained in the last block.
529 *
530 * - `i > 0`: The wanted record must be contained in the section
531 * before the found restart point. We thus do a linear search
532 * starting from the preceding restart point.
533 */
534 if (i > 0)
535 it->next_off = block_restart_offset(it->block, i - 1);
536 else
537 it->next_off = it->block->header_off + 4;
538
539 err = reftable_record_init(&rec, reftable_block_type(it->block));
540 if (err < 0)
541 goto done;
542
543 /*
544 * We're looking for the last entry less than the wanted key so that
545 * the next call to `block_reader_next()` would yield the wanted
546 * record. We thus don't want to position our iterator at the sought
547 * after record, but one before. To do so, we have to go one entry too
548 * far and then back up.
549 */
550 while (1) {
551 size_t prev_off = it->next_off;
552
553 err = block_iter_next(it, &rec);
554 if (err < 0)
555 goto done;
556 if (err > 0) {
557 it->next_off = prev_off;
558 err = 0;
559 goto done;
560 }
561
562 err = reftable_record_key(&rec, &it->last_key);
563 if (err < 0)
564 goto done;
565
566 /*
567 * Check whether the current key is greater or equal to the
568 * sought-after key. In case it is greater we know that the
569 * record does not exist in the block and can thus abort early.
570 * In case it is equal to the sought-after key we have found
571 * the desired record.
572 *
573 * Note that we store the next record's key record directly in
574 * `last_key` without restoring the key of the preceding record
575 * in case we need to go one record back. This is safe to do as
576 * `block_iter_next()` would return the ref whose key is equal
577 * to `last_key` now, and naturally all keys share a prefix
578 * with themselves.
579 */
580 if (reftable_buf_cmp(&it->last_key, want) >= 0) {
581 it->next_off = prev_off;
582 goto done;
583 }
584 }
585
586done:
587 reftable_record_release(&rec);
588 return err;
589}
590
591static int block_iter_seek_void(void *it, struct reftable_record *want)
592{
593 struct reftable_buf buf = REFTABLE_BUF_INIT;
594 struct block_iter *bi = it;
595 int err;
596
597 if (bi->block->block_type != want->type)
598 return REFTABLE_API_ERROR;
599
600 err = reftable_record_key(want, &buf);
601 if (err < 0)
602 goto out;
603
604 err = block_iter_seek_key(it, &buf);
605 if (err < 0)
606 goto out;
607
608 err = 0;
609
610out:
611 reftable_buf_release(&buf);
612 return err;
613}
614
615static int block_iter_next_void(void *it, struct reftable_record *rec)
616{
617 return block_iter_next(it, rec);
618}
619
620static void block_iter_close_void(void *it)
621{
622 block_iter_close(it);
623}
624
625static struct reftable_iterator_vtable block_iter_vtable = {
626 .seek = &block_iter_seek_void,
627 .next = &block_iter_next_void,
628 .close = &block_iter_close_void,
629};
630
631int reftable_block_init_iterator(const struct reftable_block *b,
632 struct reftable_iterator *it)
633{
634 struct block_iter *bi;
635
636 REFTABLE_CALLOC_ARRAY(bi, 1);
637 block_iter_init(bi, b);
638
639 assert(!it->ops);
640 it->iter_arg = bi;
641 it->ops = &block_iter_vtable;
642
643 return 0;
644}
645
646void block_writer_release(struct block_writer *bw)
647{
648 deflateEnd(bw->zstream);
649 REFTABLE_FREE_AND_NULL(bw->zstream);
650 REFTABLE_FREE_AND_NULL(bw->restarts);
651 REFTABLE_FREE_AND_NULL(bw->compressed);
652 reftable_buf_release(&bw->scratch);
653 reftable_buf_release(&bw->last_key);
654 /* the block is not owned. */
655}