3 * hashing, Merkle hash trees and data integrity
5 * Created by Victor Grishchenko on 3/6/09.
6 * Copyright 2009 Delft University of Technology. All rights reserved.
9 #ifndef SWIFT_SHA1_HASH_TREE_H
10 #define SWIFT_SHA1_HASH_TREE_H
19 /** SHA-1 hash, 20 bytes of data */
23 Sha1Hash() { memset(bits,0,20); }
24 /** Make a hash of two hashes (for building Merkle hash trees). */
25 Sha1Hash(const Sha1Hash& left, const Sha1Hash& right);
26 /** Hash an old plain string. */
27 Sha1Hash(const char* str, size_t length=-1);
28 Sha1Hash(const uint8_t* data, size_t length);
29 /** Either parse hash from hex representation of read in raw format. */
30 Sha1Hash(bool hex, const char* hash);
32 std::string hex() const;
33 bool operator == (const Sha1Hash& b) const
34 { return 0==memcmp(bits,b.bits,SIZE); }
35 bool operator != (const Sha1Hash& b) const { return !(*this==b); }
36 const char* operator * () const { return (char*) bits; }
38 const static Sha1Hash ZERO;
39 const static size_t SIZE;
43 /** This class controls data integrity of some file; hash tree is put to
44 an auxilliary file next to it. The hash tree file is mmap'd for
45 performance reasons. Actually, I'd like the data file itself to be
46 mmap'd, but 32-bit platforms do not allow that for bigger files.
48 There are two variants of the general workflow: either a HashTree
49 is initialized with a root hash and the rest of hashes and data is
50 spoon-fed later, OR a HashTree is initialized with a data file, so
51 the hash tree is derived, including the root hash.
55 /** Merkle hash tree: root */
58 /** Merkle hash tree: peak hashes */
59 Sha1Hash peak_hashes_[64];
62 /** File descriptor to put hashes to */
65 /** Whether to re-hash files. */
67 /** Base size, as derived from the hashes. */
70 /** Part of the tree currently checked. */
78 void RecoverProgress();
79 Sha1Hash DeriveRoot();
80 bool OfferPeakHash (bin64_t pos, const Sha1Hash& hash);
84 HashTree (const char* file_name, const Sha1Hash& root=Sha1Hash::ZERO,
85 const char* hash_filename=NULL);
87 /** Offer a hash; returns true if it verified; false otherwise.
88 Once it cannot be verified (no sibling or parent), the hash
89 is remembered, while returning false. */
90 bool OfferHash (bin64_t pos, const Sha1Hash& hash);
91 /** Offer data; the behavior is the same as with a hash:
92 accept or remember or drop. Returns true => ACK is sent. */
93 bool OfferData (bin64_t bin, const char* data, size_t length);
94 /** For live streaming. Not implemented yet. */
95 int AppendData (char* data, int length) ;
97 int file_descriptor () const { return fd_; }
98 /** Returns the number of peaks (read on peak hashes). */
99 int peak_count () const { return peak_count_; }
100 /** Returns the i-th peak's bin number. */
101 bin64_t peak (int i) const { return peaks_[i]; }
102 /** Returns peak hash #i. */
103 const Sha1Hash& peak_hash (int i) const { return peak_hashes_[i]; }
104 /** Return the peak bin the given bin belongs to. */
105 bin64_t peak_for (bin64_t pos) const;
106 /** Return a (Merkle) hash for the given bin. */
107 const Sha1Hash& hash (bin64_t pos) const {return hashes_[pos];}
108 /** Give the root hash, which is effectively an identifier of this file. */
109 const Sha1Hash& root_hash () const { return root_hash_; }
110 /** Get file size, in bytes. */
111 uint64_t size () const { return size_; }
112 /** Get file size in packets (in kilobytes, rounded up). */
113 uint64_t packet_size () const { return sizek_; }
114 /** Number of bytes retrieved and checked. */
115 uint64_t complete () const { return complete_; }
116 /** Number of packets retrieved and checked. */
117 uint64_t packets_complete () const { return completek_; }
118 /** The number of bytes completed sequentially, i.e. from the beginning of
119 the file, uninterrupted. */
120 uint64_t seq_complete () ;
121 /** Whether the file is complete. */
123 { return size_ && complete_==size_; }
124 /** The binmap of complete packets. */
125 binmap_t& ack_out () { return ack_out_; }