5 * Created by Victor Grishchenko on 3/6/09.
6 * Copyright 2009 Delft University of Technology. All rights reserved.
11 //#include <openssl/sha.h>
19 #define OPENFLAGS O_RDWR|O_CREAT|_O_BINARY
21 #define OPENFLAGS O_RDWR|O_CREAT
25 using namespace swift;
28 const size_t Sha1Hash::SIZE = HASHSZ;
29 const Sha1Hash Sha1Hash::ZERO = Sha1Hash();
31 void SHA1 (const void *data, size_t length, unsigned char *hash) {
34 blk_SHA1_Update(&ctx, data, length);
35 blk_SHA1_Final(hash, &ctx);
38 Sha1Hash::Sha1Hash(const Sha1Hash& left, const Sha1Hash& right) {
40 memcpy(data,left.bits,SIZE);
41 memcpy(data+SIZE,right.bits,SIZE);
42 SHA1((unsigned char*)data,SIZE*2,bits);
45 Sha1Hash::Sha1Hash(const char* data, size_t length) {
47 length = strlen(data);
48 SHA1((unsigned char*)data,length,bits);
51 Sha1Hash::Sha1Hash(const uint8_t* data, size_t length) {
52 SHA1(data,length,bits);
55 Sha1Hash::Sha1Hash(bool hex, const char* hash) {
59 for(int i=0; i<SIZE; i++) {
60 strncpy(hx,hash+i*2,2);
61 sscanf(hx, "%x", &val);
64 assert(this->hex()==std::string(hash));
66 memcpy(bits,hash,SIZE);
69 std::string Sha1Hash::hex() const {
71 for(int i=0; i<HASHSZ; i++)
72 sprintf(hex+i*2, "%02x", (int)(unsigned char)bits[i]);
73 return std::string(hex,HASHSZ*2);
78 /** H a s h t r e e */
81 HashTree::HashTree (const char* filename, const Sha1Hash& root_hash, const char* hash_filename) :
82 root_hash_(root_hash), fd_(0), hash_fd_(0), data_recheck_(true),
83 peak_count_(0), hashes_(NULL), size_(0), sizek_(0),
84 complete_(0), completek_(0)
86 fd_ = open(filename,OPENFLAGS,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
89 print_error("cannot open the file");
94 strcat(hfn, filename);
95 strcat(hfn, ".mhash");
97 strcpy(hfn,hash_filename);
98 hash_fd_ = open(hfn,OPENFLAGS,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
101 print_error("cannot open hash file");
104 if (root_hash_==Sha1Hash::ZERO) { // fresh submit, hash it
105 assert(file_size(fd_));
109 } // else LoadComplete()
113 void HashTree::Submit () {
114 size_ = file_size(fd_);
115 sizek_ = (size_ + 1023) >> 10;
116 peak_count_ = bin64_t::peaks(sizek_,peaks_);
117 int hashes_size = Sha1Hash::SIZE*sizek_*2;
118 file_resize(hash_fd_,hashes_size);
119 hashes_ = (Sha1Hash*) memory_map(hash_fd_,hashes_size);
121 size_ = sizek_ = complete_ = completek_ = 0;
122 print_error("mmap failed");
125 for (size_t i=0; i<sizek_; i++) {
127 size_t rd = read(fd_,kilo,1<<10);
128 if (rd<(1<<10) && i!=sizek_-1) {
134 hashes_[pos] = Sha1Hash(kilo,rd);
139 for (int p=0; p<peak_count_; p++) {
140 if (!peaks_[p].is_base())
141 for(bin64_t b=peaks_[p].left_foot().parent(); b.within(peaks_[p]); b=b.next_dfsio(1))
142 hashes_[b] = Sha1Hash(hashes_[b.left()],hashes_[b.right()]);
143 peak_hashes_[p] = hashes_[peaks_[p]];
146 root_hash_ = DeriveRoot();
151 /** Basically, simulated receiving every single packet, except
152 for some optimizations. */
153 void HashTree::RecoverProgress () {
154 size_t size = file_size(fd_);
155 size_t sizek = (size + 1023) >> 10;
157 int peak_count = bin64_t::peaks(sizek,peaks);
158 for(int i=0; i<peak_count; i++) {
160 file_seek(hash_fd_,peaks[i]*sizeof(Sha1Hash));
161 if (read(hash_fd_,&peak_hash,sizeof(Sha1Hash))!=sizeof(Sha1Hash))
163 OfferPeakHash(peaks[i], peak_hash);
166 return; // if no valid peak hashes found
167 // at this point, we may use mmapd hashes already
168 // so, lets verify hashes and the data we've got
170 memset(zeros, 0, 1<<10);
171 Sha1Hash kilo_zero(zeros,1<<10);
172 for(int p=0; p<packet_size(); p++) {
175 if (hashes_[pos]==Sha1Hash::ZERO)
177 size_t rd = read(fd_,buf,1<<10);
178 if (rd!=(1<<10) && p!=packet_size()-1)
180 if (rd==(1<<10) && !memcmp(buf, zeros, rd) &&
181 hashes_[pos]!=kilo_zero) // FIXME
183 if ( data_recheck_ && !OfferHash(pos, Sha1Hash(buf,rd)) )
188 if (rd!=(1<<10) && p==packet_size()-1)
189 size_ = ((sizek_-1)<<10) + rd;
194 bool HashTree::OfferPeakHash (bin64_t pos, const Sha1Hash& hash) {
197 bin64_t last_peak = peaks_[peak_count_-1];
198 if ( pos.layer()>=last_peak.layer() ||
199 pos.base_offset()!=last_peak.base_offset()+last_peak.width() )
202 peaks_[peak_count_] = pos;
203 peak_hashes_[peak_count_] = hash;
205 // check whether peak hash candidates add up to the root hash
206 Sha1Hash mustbe_root = DeriveRoot();
207 if (mustbe_root!=root_hash_)
209 for(int i=0; i<peak_count_; i++)
210 sizek_ += peaks_[i].width();
212 // bingo, we now know the file size (rounded up to a KByte)
215 completek_ = complete_ = 0;
216 sizek_ = (size_ + 1023) >> 10;
218 size_t cur_size = file_size(fd_);
219 if ( cur_size<=(sizek_-1)<<10 || cur_size>sizek_<<10 )
220 if (file_resize(fd_, size_)) {
221 print_error("cannot set file size\n");
222 size_=0; // remain in the 0-state
226 // mmap the hash file into memory
227 size_t expected_size = sizeof(Sha1Hash)*sizek_*2;
228 if ( file_size(hash_fd_) != expected_size )
229 file_resize (hash_fd_, expected_size);
231 hashes_ = (Sha1Hash*) memory_map(hash_fd_,expected_size);
233 size_ = sizek_ = complete_ = completek_ = 0;
234 print_error("mmap failed");
238 for(int i=0; i<peak_count_; i++)
239 hashes_[peaks_[i]] = peak_hashes_[i];
244 Sha1Hash HashTree::DeriveRoot () {
245 int c = peak_count_-1;
246 bin64_t p = peaks_[c];
247 Sha1Hash hash = peak_hashes_[c];
249 while (p!=bin64_t::ALL) {
252 hash = Sha1Hash(hash,Sha1Hash::ZERO);
254 if (c<0 || peaks_[c]!=p.sibling())
255 return Sha1Hash::ZERO;
256 hash = Sha1Hash(peak_hashes_[c],hash);
260 //dprintf("p %lli %s\n",(uint64_t)p,hash.hex().c_str());
266 /** For live streaming: appends the data, adjusts the tree.
267 @ return the number of fresh (tail) peak hashes */
268 int HashTree::AppendData (char* data, int length) {
273 bin64_t HashTree::peak_for (bin64_t pos) const {
275 while (pi<peak_count_ && !pos.within(peaks_[pi]))
277 return pi==peak_count_ ? bin64_t(bin64_t::NONE) : peaks_[pi];
281 bool HashTree::OfferHash (bin64_t pos, const Sha1Hash& hash) {
282 if (!size_) // only peak hashes are accepted at this point
283 return OfferPeakHash(pos,hash);
284 bin64_t peak = peak_for(pos);
285 if (peak==bin64_t::NONE)
288 return hash == hashes_[pos];
289 if (ack_out_.get(pos.parent())!=binmap_t::EMPTY)
290 return hash==hashes_[pos]; // have this hash already, even accptd data
293 return false; // who cares?
295 Sha1Hash uphash = hash;
296 while ( p!=peak && ack_out_.get(p)==binmap_t::EMPTY ) {
299 uphash = Sha1Hash(hashes_[p.left()],hashes_[p.right()]) ;
300 }// walk to the nearest proven hash
301 return uphash==hashes_[p];
305 bool HashTree::OfferData (bin64_t pos, const char* data, size_t length) {
310 if (length<1024 && pos!=bin64_t(0,sizek_-1))
312 if (ack_out_.get(pos)==binmap_t::FILLED)
313 return true; // to set data_in_
314 bin64_t peak = peak_for(pos);
315 if (peak==bin64_t::NONE)
318 Sha1Hash data_hash(data,length);
319 if (!OfferHash(pos, data_hash)) {
320 printf("invalid hash for %s: %s\n",pos.str(),data_hash.hex().c_str());
324 //printf("g %lli %s\n",(uint64_t)pos,hash.hex().c_str());
325 ack_out_.set(pos,binmap_t::FILLED);
326 pwrite(fd_,data,length,pos.base_offset()<<10);
329 if (pos.base_offset()==sizek_-1) {
330 size_ = ((sizek_-1)<<10) + length;
331 if (file_size(fd_)!=size_)
332 file_resize(fd_,size_);
338 uint64_t HashTree::seq_complete () {
339 uint64_t seqk = ack_out_.seq_length();
346 HashTree::~HashTree () {
348 memory_unmap(hash_fd_, hashes_, sizek_*2*sizeof(Sha1Hash));