3 * hashing, Merkle hash trees and data integrity
5 * Created by Victor Grishchenko on 3/6/09.
6 * Copyright 2009-2012 TECHNISCHE UNIVERSITEIT DELFT. All rights reserved.
9 #ifndef SWIFT_SHA1_HASH_TREE_H
10 #define SWIFT_SHA1_HASH_TREE_H
20 /** SHA-1 hash, 20 bytes of data */
24 Sha1Hash() { memset(bits,0,HASHSZ); }
25 /** Make a hash of two hashes (for building Merkle hash trees). */
26 Sha1Hash(const Sha1Hash& left, const Sha1Hash& right);
27 /** Hash an old plain string. */
28 Sha1Hash(const char* str, size_t length=-1);
29 Sha1Hash(const uint8_t* data, size_t length);
30 /** Either parse hash from hex representation of read in raw format. */
31 Sha1Hash(bool hex, const char* hash);
33 std::string hex() const;
34 bool operator == (const Sha1Hash& b) const
35 { return 0==memcmp(bits,b.bits,SIZE); }
36 bool operator != (const Sha1Hash& b) const { return !(*this==b); }
37 const char* operator * () const { return (char*) bits; }
39 const static Sha1Hash ZERO;
40 const static size_t SIZE = HASHSZ;
43 // Arno: The chunk size parameter can now be configured via the constructor,
44 // for values up to 8192. Above that you'll have to edit the
45 // SWIFT_MAX_SEND_DGRAM_SIZE in swift.h
47 #define SWIFT_DEFAULT_CHUNK_SIZE 1024
50 /** This class controls data integrity of some file; hash tree is put to
51 an auxilliary file next to it. The hash tree file is mmap'd for
52 performance reasons. Actually, I'd like the data file itself to be
53 mmap'd, but 32-bit platforms do not allow that for bigger files.
55 There are two variants of the general workflow: either a HashTree
56 is initialized with a root hash and the rest of hashes and data is
57 spoon-fed later, OR a HashTree is initialized with a data file, so
58 the hash tree is derived, including the root hash.
60 class HashTree : Serializable {
61 /** Merkle hash tree: root */
64 /** Merkle hash tree: peak hashes */
65 Sha1Hash peak_hashes_[64];
68 /** File descriptor to put hashes to */
71 std::string filename_; // for easy serialization
72 /** Base size, as derived from the hashes. */
75 /** Part of the tree currently checked. */
78 /** Binmap of own chunk availability */
82 /** Arno: configurable fixed chunk size in bytes */
86 binmap_t is_hash_verified_; // binmap being abused as bitmap, only layer 0 used
87 // FAXME: make is_hash_verified_ part of persistent state?
89 int internal_deserialize(FILE *fp,bool contentavail=true);
92 bool check_netwvshash_;
97 void RecoverProgress();
98 bool RecoverPeakHashes();
99 Sha1Hash DeriveRoot();
100 bool OfferPeakHash (bin_t pos, const Sha1Hash& hash);
105 HashTree (const char* file_name, const Sha1Hash& root=Sha1Hash::ZERO, uint32_t chunk_size=SWIFT_DEFAULT_CHUNK_SIZE,
106 const char* hash_filename=NULL, bool force_check_diskvshash=true, bool check_netwvshash=true, const char* binmap_filename=NULL);
108 // Arno, 2012-01-03: Hack to quickly learn root hash from a checkpoint
109 HashTree (bool dummy, const char* binmap_filename);
111 /** Offer a hash; returns true if it verified; false otherwise.
112 Once it cannot be verified (no sibling or parent), the hash
113 is remembered, while returning false. */
114 bool OfferHash (bin_t pos, const Sha1Hash& hash);
115 /** Offer data; the behavior is the same as with a hash:
116 accept or remember or drop. Returns true => ACK is sent. */
117 bool OfferData (bin_t bin, const char* data, size_t length);
118 /** For live streaming. Not implemented yet. */
119 int AppendData (char* data, int length) ;
121 int file_descriptor () const { return fd_; }
122 /** Returns the number of peaks (read on peak hashes). */
123 int peak_count () const { return peak_count_; }
124 /** Returns the i-th peak's bin number. */
125 bin_t peak (int i) const { return peaks_[i]; }
126 /** Returns peak hash #i. */
127 const Sha1Hash& peak_hash (int i) const { return peak_hashes_[i]; }
128 /** Return the peak bin the given bin belongs to. */
129 bin_t peak_for (bin_t pos) const;
130 /** Return a (Merkle) hash for the given bin. */
131 const Sha1Hash& hash (bin_t pos) const {return hashes_[pos.toUInt()];}
132 /** Give the root hash, which is effectively an identifier of this file. */
133 const Sha1Hash& root_hash () const { return root_hash_; }
134 /** Get file size, in bytes. */
135 uint64_t size () const { return size_; }
136 /** Get file size in chunks (in kilobytes, rounded up). */
137 uint64_t size_in_chunks () const { return sizec_; }
138 /** Number of bytes retrieved and checked. */
139 uint64_t complete () const { return complete_; }
140 /** Number of chunks retrieved and checked. */
141 uint64_t chunks_complete () const { return completec_; }
142 /** The number of bytes completed sequentially, i.e. from the beginning of
143 the file, uninterrupted. */
144 uint64_t seq_complete () ;
145 /** Whether the file is complete. */
147 { return size_ && complete_==size_; }
148 /** The binmap of complete chunks. */
149 binmap_t& ack_out () { return ack_out_; }
150 std::string filename() { return filename_; } // Arno
151 uint32_t chunk_size() { return chunk_size_; } // CHUNKSIZE
154 // Arno: persistent storage for state other than hashes (which are in .mhash)
155 int serialize(FILE *fp);
156 int deserialize(FILE *fp);
157 int partial_deserialize(FILE *fp);
160 bool get_check_netwvshash() { return check_netwvshash_; }