Absolute hinky bare-bones implementation of multiformats in Perl

Merge branch 'f-various-fixes-001' into 'main'

added a readme, and *bwerk* documentation

See merge request blockstackers/multiformats!2

+320 -11
+9
README.md
··· 1 + ## Multiformats 2 + 3 + A really, really bare-bones implementation of the Multiformat protocol as found on https://multiformats.io/ 4 + 5 + Consider this entire package experimental, things **will** break. 6 + 7 + ## Github is not the primary repository 8 + 9 + If you're reading this on Github, keep in mind that this package is not developed here; the only code that lives here is the main branch, put here to allow others to access it easier. Development takes place on my personal Gitlab instance, which should also explain the `.gitlab-ci.yml` file ;)
+49
lib/Multiformats.pm
··· 8 8 # this package exists purely as a little placeholder for various abstracts and versions; as well as some 9 9 # of the documentation 10 10 11 + =pod 12 + 13 + =head1 NAME 14 + 15 + Multiformats - A bare-bones multiformat implementation 16 + 17 + =head1 SYNOPSIS 18 + 19 + use Multiformats::Multibase qw/multibase_encode/; 20 + 21 + my $encoded = multibase_encode('base32upper', "my data that I would like encoded") 22 + 23 + my $multibase = Multiformats::Multibase->new; 24 + my $also_encoded = $multibase->encode('base32', "even more data I want encoded"); 25 + 26 + =head1 FUNCTIONAL/OO 27 + 28 + Every module can be used either in an OO fashion or purely functional. Please see the various modules that make up this package for more details. 29 + 30 + =head1 CODEC SUPPORT 31 + 32 + Not all codecs/hash functions/base encoders are supported, you will find out quick if they aren't because the various encode/decode functions will die when asked to encode or decode something with an unknown codec. CID only supports CIDv1, CIDv0 is not supported *at all*. 33 + 34 + =head1 SEE ALSO 35 + 36 + =over 37 + 38 + =item * L<Multiformats::CID> - CID handling 39 + 40 + =item * L<Multiformats::Multibase> - Multibase encoding/decoding 41 + 42 + =item * L<Multiformats::Multihash> - Multihash encoding/unwrapping 43 + 44 + =item * L<Multiformats::Multicodec> - Multicodec wrapping/unwrapping 45 + 46 + =item * L<Multiformats::Varint> - Varint encoding/decoding 47 + 48 + =back 49 + 50 + =head1 AUTHOR 51 + 52 + Ben van Staveren <madcat@cpan.org>, <ben@blockstackers.net> 53 + 54 + =head1 LICENSE 55 + 56 + This package is licensed under the same terms as Perl itself. 57 + 58 + =cut 59 + 11 60 1;
+55 -5
lib/Multiformats/CID.pm
··· 33 33 my $mc = Multiformats::Multicodec::_get_by_tag($mc_codec); 34 34 35 35 # not sure what that codec tag does in here because it doesn't appear to do 36 - # anything short of encoding, well, nothing - the remaining data is the multihash 36 + # anything - it's not encoding the remainder, so... what's it do Frank?! 37 37 my ($mh, $hash) = multihash_unwrap(substr($bytes, $bread + $bread_codec)); 38 - 39 38 return Multiformats::CID::CIDv1->new(version => 1, codec => $mc->[0], hash_function => $mh->[0], hash => $hash); 40 39 } 41 40 } ··· 43 42 package 44 43 Multiformats::CID::CIDv1 { 45 44 use Mojo::Base -base, -signatures; 46 - use Multiformats::Multicodec qw/multicodec_wrap multicodec_unwrap/; 45 + use Multiformats::Multicodec qw/multicodec_wrap/; 47 46 use Multiformats::Multibase qw/multibase_encode/; 48 47 use Multiformats::Varint qw/varint_encode/; 49 48 use Multiformats::Multihash qw/multihash_wrap/; 50 49 use overload bool => sub {1}, '""' => sub { shift->to_str }, fallback => 1; 51 50 52 - # note that the codecs are the tag values, not the names, we need to take this into account 53 - # in multibase_encode and multihash_encode 54 51 has [qw/version codec hash_function hash/] => undef; 55 52 56 53 sub to_str($self, $codec = 'base32') { ··· 64 61 return $version . $content; 65 62 } 66 63 } 64 + 65 + =pod 66 + 67 + =head1 NAME 68 + 69 + Multiformats::CID - CID handling 70 + 71 + =head1 SYNOPSIS 72 + 73 + use Multiformats::CID qw/cid/; 74 + 75 + # can use either the stringified representation or the raw binary representation 76 + my $cid = cid('bafyreigngt2aslhuh7jbgpuliep4v4uvlantdmew2ojr7u3upknttpvqxa'); 77 + 78 + =head1 FUNCTIONS 79 + 80 + =head2 cid(...) 81 + 82 + When given a string representation or binary representation of a CID will decode the version, codec, hash function, and hash value used and will return those wrapped in a C<Multiformats::CID::CIDv1> object. 83 + 84 + =head1 CIDv1 Object 85 + 86 + This object wraps a CID and has the following attributes and methods 87 + 88 + =head2 ATTRIBUTES 89 + 90 + =head3 version 91 + 92 + Returns the version of the CID (always 1) 93 + 94 + =head3 codec 95 + 96 + Returns the name of the multibase codec 97 + 98 + =head3 hash_function 99 + 100 + Returns the name of the hash function used 101 + 102 + =head3 hash 103 + 104 + Returns the binary hash (obtained via the hash function) 105 + 106 + =head2 METHODS 107 + 108 + =head3 to_str() 109 + 110 + Returns the stringified version of the CID. The CID object itself is overloaded to return this when used in string context. 111 + 112 + =head3 to_bytes() 113 + 114 + Returns the binary representation of the CID. 115 + 116 + =cut 67 117 68 118 1; 69 119
+39
lib/Multiformats/Multibase.pm
··· 74 74 } 75 75 } 76 76 77 + =pod 78 + 79 + =head1 NAME 80 + 81 + Multiformats::Multibase - Multibase decoding and encoding 82 + 83 + =head1 SYNOPSIS 84 + 85 + use Multiformats::Multibase qw/multibase_encode multibase_decode/; 86 + 87 + my $encoded = multibase_encode('base32', 'this will be base32 encoded'); 88 + my $decoded = multibase_decode($encoded_string); 89 + 90 + =head1 FUNCTIONS 91 + 92 + =head2 multibase_encode($base, $data_to_encode) 93 + 94 + Encodes the given data with the given base. See below for supported bases. 95 + 96 + =head2 multibase_decode($encoded_data); 97 + 98 + Decodes the given data. When called in scalar context returns the decoded data. When called in list context returns a list containing the multibase tag and the decoded data 99 + 100 + =head1 SUPPORTED BASES 101 + 102 + =over 103 + 104 + =item * base32 105 + 106 + =item * base32upper 107 + 108 + =item * base36 109 + 110 + =item * base58btc 111 + 112 + =back 113 + 114 + =cut 115 + 77 116 1;
+63
lib/Multiformats/Multicodec.pm
··· 13 13 [ 'dag-cbor', 0x71 ], 14 14 ]; 15 15 16 + sub wrap($self, $as, $value) { 17 + return multihash_wrap($as, $value); 18 + } 19 + 20 + sub unwrap($self, $value) { 21 + return multihash_unwrap($value); 22 + } 23 + 24 + sub get_codec($self, $value) { 25 + return multihash_codec($value); 26 + } 27 + 28 + sub new($pkg) { 29 + return bless({}, $pkg); 30 + } 31 + 16 32 sub _get_by_name($as) { 17 33 foreach my $entry (@{__PACKAGE__->MULTICODEC_MAP}) { 18 34 return $entry if($entry->[0] eq $as); ··· 54 70 } 55 71 } 56 72 } 73 + 74 + =pod 75 + 76 + =head1 NAME 77 + 78 + Multiformats::Multicodec - Multicodec encoding/decoding/wrapping 79 + 80 + =head1 SYNOPSIS 81 + 82 + use Multiformats::Multicodec qw/multicodec_get_codec multicodec_unwrap multicodec_wrap/; 83 + 84 + my $data = '...'; 85 + 86 + my $encoded = multicodec_wrap('dag-cbor', $data); 87 + my $decoded = multicodec_unwrap($encoded); 88 + 89 + my $codec = multicodec_get_codec($encoded); 90 + 91 + $codec->[0]; # dag-cbor 92 + $codec->[1]; # 0x71 93 + 94 + 95 + =head1 FUNCTIONS 96 + 97 + =head2 multicodec_wrap($codec, $data); 98 + 99 + Wraps the given data with the proper multicodec tag 100 + 101 + =head2 multicodec_unwrap($data) 102 + 103 + Unwraps the given data and returns the original raw data 104 + 105 + =head2 multicodec_get_codec($data) 106 + 107 + Returns an arrayref containing the codec information that the data is encoded with. First item in the arrayref is the codec name, second item is the codec tag value. 108 + 109 + =head1 SUPPORTED CODECS 110 + 111 + =over 112 + 113 + =item * raw 114 + 115 + =item * dag-cbor 116 + 117 + =back 118 + 119 + =cut 57 120 58 121 1;
+73 -5
lib/Multiformats/Multihash.pm
··· 19 19 return multihash_encode($as, $value); 20 20 } 21 21 22 + sub wrap($self, $as, $value) { 23 + return multihash_wrap($as, $value); 24 + } 25 + 26 + sub unwrap($self, $value) { 27 + return multihash_unwrap($value); 28 + } 29 + 22 30 sub new($pkg) { 23 31 return bless({}, $pkg); 24 32 } ··· 35 43 [ 'sha2-384', 0x20, undef, sub { return sha_384(shift) } ], 36 44 ]; 37 45 38 - sub codecs { 39 - return __PACKAGE__->MULTIFORMAT_MAP; 40 - } 41 - 42 46 sub _map_by_tag($tag) { 43 47 foreach my $entry (@{__PACKAGE__->MULTIFORMAT_MAP}) { 44 48 return $entry if($entry->[1] == $tag); ··· 73 77 if(my $e = _map_by_tag($t)) { 74 78 my ($l, $bread_len) = varint_decode_raw(substr($bytes, $bread_type)); 75 79 return wantarray 76 - ? ($e, substr($bytes, $bread_type + $bread_len)) # allows us to get the whole kit and kaboodle in one sitting 80 + ? ([$e->[0], $e->[1] ], substr($bytes, $bread_type + $bread_len)) # allows us to get the whole kit and kaboodle in one sitting 77 81 : substr($bytes, $bread_type + $bread_len) 78 82 } else { 79 83 die 'unknown format ' . $t . ', '; ··· 99 103 } 100 104 } 101 105 } 106 + 107 + =pod 108 + 109 + =head1 NAME 110 + 111 + Multiformats::Multihash - Multihash encoding/decoding/wrapping 112 + 113 + =head1 SYNOPSIS 114 + 115 + use Multiformats::Multihash qw/multihash_encode multihash_decode multihash_unwrap multihash_wrap/; 116 + my $data = '...'; 117 + 118 + my $encoded = multihash_encode('sha2-256', $data); 119 + 120 + my $hash = Digest::SHA::sha256($data); 121 + 122 + my $encoded_also = multihash_wrap('sha2-256', $hash); 123 + 124 + =head1 FUNCTIONS 125 + 126 + =head2 multihash_encode($hash_function, $data) 127 + 128 + Hashes the data with the given hash function, and encodes the result into a Multihash 129 + 130 + =head2 multihash_decode($data) 131 + 132 + Parses the used hash function and hash length for validity, and returns the original raw hash 133 + 134 + =head2 multihash_unwrap($data) 135 + 136 + Acts similar to C<multihash_decode> when called in scalar context, but when called in list context returns a list containing the encoding/decoding array and the raw hash. The decoding arrayref has the hash function name as first element, and the hash function tag value as the second value. 137 + 138 + my ($encoding, $raw_hash) = multihash_unwrap($data) 139 + 140 + $encoding->[0]; # e.g. 'sha2-256' 141 + $encoding->[1]; # e.g. 0x12 142 + 143 + =head2 multihash_wrap($hash_function, $data) 144 + 145 + Acts similar to C<multihash_encode> but assumes the data passed is already a raw hash so does not digest it before encoding to a Multihash 146 + 147 + =head1 SUPPORTED HASHES 148 + 149 + =over 150 + 151 + =item * identity 152 + 153 + =item * sha1 154 + 155 + =item * sha2-256 156 + 157 + =item * sha2-512 158 + 159 + =item * sha3-384 160 + 161 + =item * sha3-256 162 + 163 + =item * sha3-224 164 + 165 + =item * sha2-384 166 + 167 + =back 168 + 169 + =cut 102 170 103 171 1;
+32 -1
lib/Multiformats/Varint.pm
··· 46 46 47 47 sub varint_decode($value) { 48 48 my ($x, $read) = varint_decode_raw($value); 49 - die 'Multiformats::Varint::varint_decode: not all bytes used by encoding' if($read > length($value)); 49 + die 'Multiformats::Varint::varint_decode: not all bytes used by encoding' if($read < length($value)); 50 50 return $x; 51 51 } 52 52 ··· 71 71 : $x; 72 72 } 73 73 } 74 + 75 + =pod 76 + 77 + =head1 NAME 78 + 79 + Multiformats::Varint - Varint decoding and encoding 80 + 81 + =head1 SYNOPSIS 82 + 83 + use Multiformats::Varint qw/varint_encode varint_decode/; 84 + 85 + my $encoded = varint_encode(300); # \xAC\x02 86 + my $decoded = varint_decode("\xAC\x02"); # 300 87 + 88 + =head1 FUNCTIONS 89 + 90 + =head2 varint_encode(...) 91 + 92 + Encodes the given unsigned integer number to an unsigned Varint; returns a byte string. Will die if the varint is larger than the spec allows (>9 bytes). 93 + 94 + =head2 varint_decode(...) 95 + 96 + Decodes the given byte string to an unsigned integer. Will die if there are more bytes passed than required to decode a Varint. 97 + 98 + =head2 varint_decode_raw(...) 99 + 100 + Like varint_decode, but will not die when there are bytes left in the input. 101 + 102 + When called in scalar context will return the decoded unsigned integer, when called in list context will return a list containing the unsigned integer, and the number of bytes used from the input. Does not alter the input value, so you will have to use C<substr> or some other mechanism to strip the used bytes out of the input value. 103 + 104 + =cut 74 105 75 106 1;