currrent
[swift-upb.git] / hashtree.cpp
1 /*
2  *  hashtree.cpp
3  *  serp++
4  *
5  *  Created by Victor Grishchenko on 3/6/09.
6  *  Copyright 2009 Delft University of Technology. All rights reserved.
7  *
8  */
9
10 #include "hashtree.h"
11 #include <openssl/sha.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include "compat.h"
16
17 using namespace p2tp;
18
19 #define HASHSZ 20
20 const size_t Sha1Hash::SIZE = HASHSZ;
21 const Sha1Hash Sha1Hash::ZERO = Sha1Hash();
22
23 Sha1Hash::Sha1Hash(const Sha1Hash& left, const Sha1Hash& right) {
24         char data[HASHSZ*2];
25         memcpy(data,left.bits,SIZE);
26         memcpy(data+SIZE,right.bits,SIZE);
27         SHA1((unsigned char*)data,SIZE*2,bits);
28 }
29
30 Sha1Hash::Sha1Hash(const char* data, size_t length) {
31     if (length==-1)
32         length = strlen(data);
33         SHA1((unsigned char*)data,length,bits);
34 }
35
36 Sha1Hash::Sha1Hash(const uint8_t* data, size_t length) {
37         SHA1(data,length,bits);
38 }
39
40 Sha1Hash::Sha1Hash(bool hex, const char* hash) {
41         if (hex) {
42         char hx[3]; hx[2]=0;
43         int val;
44         for(int i=0; i<SIZE; i++) {
45             strncpy(hx,hash+i*2,2);
46             sscanf(hx, "%x", &val);
47             bits[i] = val;
48         }
49         assert(this->hex()==std::string(hash));
50     } else
51         memcpy(bits,hash,SIZE);
52 }
53
54 std::string     Sha1Hash::hex() const {
55         char hex[HASHSZ*2+1];
56         for(int i=0; i<HASHSZ; i++)
57                 sprintf(hex+i*2, "%02x", (int)(unsigned char)bits[i]);
58         return std::string(hex,HASHSZ*2);
59 }
60
61
62
63 /**     H a s h   t r e e       */
64
65
66 HashTree::HashTree (const char* filename, const Sha1Hash& root_hash, const char* hash_filename) :
67 root_hash_(root_hash), fd_(0), hash_fd_(0), data_recheck_(true),
68 peak_count_(0), hashes_(NULL), size_(0), sizek_(0),
69 complete_(0), completek_(0)
70 {
71         fd_ = open(filename,O_RDWR|O_CREAT,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
72         if (fd_<0)
73         return;
74     char hfn[1024] = "";
75     if (!hash_filename) {
76         strcat(hfn, filename);
77         strcat(hfn, ".mhash");
78     } else
79         strcpy(hfn,hash_filename);
80     hash_fd_ = open(hfn,O_RDWR|O_CREAT,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
81     if (hash_fd_<0)
82         return;
83     if (root_hash_==Sha1Hash::ZERO) { // fresh submit, hash it
84         assert(file_size(fd_));
85         Submit();
86     } else {
87         RecoverProgress();
88     } // else  LoadComplete()
89 }
90
91
92 void            HashTree::Submit () {
93     size_ = file_size(fd_);
94         sizek_ = (size_>>10) + ((size_&1023) ? 1 : 0);
95     peak_count_ = bin64_t::peaks(sizek_,peaks_);
96     int hashes_size = Sha1Hash::SIZE*sizek_*2;
97     file_resize(hash_fd_,hashes_size);
98     hashes_ = (Sha1Hash*) memory_map(hash_fd_,hashes_size);
99     if (!hashes_) {
100         size_ = sizek_ = complete_ = completek_ = 0;
101         print_error("mmap failed");
102         return;
103     }
104     for (size_t i=0; i<sizek_; i++) {
105         char kilo[1<<10];
106         size_t rd = read(fd_,kilo,1<<10);
107         if (rd<(1<<10) && i!=sizek_-1) {
108             free(hashes_);
109             hashes_=NULL;
110             return;
111         }
112         bin64_t pos(0,i);
113         hashes_[pos] = Sha1Hash(kilo,rd);
114         ack_out_.set(pos);
115         complete_+=rd;
116         completek_++;
117     }
118     for (int p=0; p<peak_count_; p++) {
119         if (!peaks_[p].is_base())
120             for(bin64_t b=peaks_[p].left_foot().parent(); b.within(peaks_[p]); b=b.next_dfsio(1))
121                 hashes_[b] = Sha1Hash(hashes_[b.left()],hashes_[b.right()]);
122         peak_hashes_[p] = hashes_[peaks_[p]];
123     }
124
125     root_hash_ = DeriveRoot();
126     
127 }
128
129
130 /** Basically, simulated receiving every single packet, except
131  for some optimizations. */
132 void            HashTree::RecoverProgress () {
133     size_t size = file_size(fd_);
134         size_t sizek = (size>>10) + ((size&1023) ? 1 : 0);
135     bin64_t peaks[64];
136     int peak_count = bin64_t::peaks(sizek,peaks);
137     for(int i=0; i<peak_count; i++) {
138         Sha1Hash peak_hash;
139         file_seek(hash_fd_,peaks[i]*sizeof(Sha1Hash));
140         if (read(hash_fd_,&peak_hash,sizeof(Sha1Hash))!=sizeof(Sha1Hash))
141             return;
142         OfferPeakHash(peaks[i], peak_hash);
143     }
144     if (!this->size())
145         return; // if no valid peak hashes found
146     // at this point, we may use mmapd hashes already
147     // so, lets verify hashes and the data we've got
148     char zeros[1<<10];
149     memset(zeros, 0, 1<<10);
150     Sha1Hash kilo_zero(zeros,1<<10);
151     for(int p=0; p<size_kilo(); p++) {
152         char buf[1<<10];
153         bin64_t pos(0,p);
154         if (hashes_[pos]==Sha1Hash::ZERO)
155             continue;
156         size_t rd = read(fd_,buf,1<<10);
157         assert(rd==(1<<10) || p==size_kilo()-1);
158         if (rd==(1<<10) && !memcmp(buf, zeros, rd) && hashes_[pos]!=kilo_zero)
159             continue;
160         if ( data_recheck_ && !OfferHash(pos, Sha1Hash(buf,rd)) )
161             continue;
162         ack_out_.set(pos);
163         completek_++;
164         complete_+=rd;
165     }
166 }
167
168
169 bool            HashTree::OfferPeakHash (bin64_t pos, const Sha1Hash& hash) {
170     assert(!size_);
171     if (peak_count_) {
172         bin64_t last_peak = peaks_[peak_count_-1];
173         if ( pos.layer()>=last_peak.layer() ||
174             pos.base_offset()!=last_peak.base_offset()+last_peak.width() )
175             peak_count_ = 0;
176     }
177     peaks_[peak_count_] = pos;
178     peak_hashes_[peak_count_] = hash;
179     peak_count_++;
180     // check whether peak hash candidates add up to the root hash
181     Sha1Hash mustbe_root = DeriveRoot();
182     if (mustbe_root!=root_hash_)
183         return false;
184     for(int i=0; i<peak_count_; i++)
185         sizek_ += peaks_[i].width();
186     
187     // bingo, we now know the file size (rounded up to a KByte)
188     
189     size_ = sizek_<<10;
190     completek_ = complete_ = 0;
191         sizek_ = (size_>>10) + ((size_&1023) ? 1 : 0);
192     
193     size_t cur_size = file_size(fd_);
194     if ( cur_size<=(sizek_-1)<<10  || cur_size>sizek_<<10 )
195         if (file_resize(fd_, size_)) {
196             print_error("cannot set file size\n");
197             size_=0; // remain in the 0-state
198             return false;
199         }
200     
201     // mmap the hash file into memory
202     size_t expected_size = sizeof(Sha1Hash)*sizek_*2;
203     if ( file_size(hash_fd_) != expected_size )
204         file_resize (hash_fd_, expected_size);
205     
206     hashes_ = (Sha1Hash*) memory_map(hash_fd_,expected_size);
207     if (!hashes_) {
208         size_ = sizek_ = complete_ = completek_ = 0;
209         print_error("mmap failed");
210         return false;
211     }
212     
213     for(int i=0; i<peak_count_; i++)
214         hashes_[peaks_[i]] = peak_hashes_[i];
215     return true;
216 }
217
218
219 Sha1Hash        HashTree::DeriveRoot () {
220         int c = peak_count_-1;
221         bin64_t p = peaks_[c];
222         Sha1Hash hash = peak_hashes_[c];
223         c--;
224         while (p!=bin64_t::ALL) {
225                 if (p.is_left()) {
226                         p = p.parent();
227                         hash = Sha1Hash(hash,Sha1Hash::ZERO);
228                 } else {
229                         if (c<0 || peaks_[c]!=p.sibling())
230                                 return Sha1Hash::ZERO;
231                         hash = Sha1Hash(peak_hashes_[c],hash);
232                         p = p.parent();
233                         c--;
234                 }
235         //dprintf("p %lli %s\n",(uint64_t)p,hash.hex().c_str());
236         }
237     return hash;
238 }
239
240
241 /** For live streaming: appends the data, adjusts the tree.
242     @ return the number of fresh (tail) peak hashes */
243 int         HashTree::AppendData (char* data, int length) {
244     return 0;
245 }
246
247
248 bin64_t         HashTree::peak_for (bin64_t pos) const {
249     int pi=0;
250     while (pi<peak_count_ && !pos.within(peaks_[pi]))
251         pi++;
252     return pi==peak_count_ ? bin64_t(bin64_t::NONE) : peaks_[pi];
253 }
254
255
256 bool            HashTree::OfferHash (bin64_t pos, const Sha1Hash& hash) {
257         if (!size_)  // only peak hashes are accepted at this point
258                 return OfferPeakHash(pos,hash);
259     bin64_t peak = peak_for(pos);
260     if (peak==bin64_t::NONE)
261         return false;
262     if (peak==pos)
263         return hash == hashes_[pos];
264     if (ack_out_.get(pos.parent())!=bins::EMPTY)
265         return hash==hashes_[pos]; // have this hash already, even accptd data
266         hashes_[pos] = hash;
267     if (!pos.is_base())
268         return false; // who cares?
269     bin64_t p = pos;
270     Sha1Hash uphash = hash;
271     while ( p!=peak && ack_out_.get(p)==bins::EMPTY ) {
272         hashes_[p] = uphash;
273         p = p.parent();
274         uphash = Sha1Hash(hashes_[p.left()],hashes_[p.right()]) ;
275     }// walk to the nearest proven hash
276     return uphash==hashes_[p];
277 }
278
279
280 bool            HashTree::OfferData (bin64_t pos, const char* data, size_t length) {
281     if (!size())
282         return false;
283     if (!pos.is_base())
284         return false;
285     if (length<1024 && pos!=bin64_t(0,sizek_-1))
286         return false;
287     if (ack_out_.get(pos)==bins::FILLED)
288         return true; // to set data_in_
289     bin64_t peak = peak_for(pos);
290     if (peak==bin64_t::NONE)
291         return false;
292
293     if (!OfferHash(pos, Sha1Hash(data,length)))
294         return false;
295     
296     //printf("g %lli %s\n",(uint64_t)pos,hash.hex().c_str());
297     ack_out_.set(pos,bins::FILLED);
298     pwrite(fd_,data,length,pos.base_offset()<<10);
299     complete_ += length;
300     completek_++;
301     if (pos.base_offset()==sizek_-1) {
302         size_ = ((sizek_-1)<<10) + length;
303         if (file_size(fd_)!=size_)
304             file_resize(fd_,size_);
305     }
306     return true;
307 }
308
309
310 uint64_t      HashTree::seq_complete () {
311     uint64_t seqk = ack_out_.seq_length();
312     if (seqk==sizek_)
313         return size_;
314     else
315         return seqk<<10;
316 }
317
318 HashTree::~HashTree () {
319     if (hashes_)
320         memory_unmap(hash_fd_, hashes_, sizek_*2*sizeof(Sha1Hash));
321     if (fd_)
322         close(fd_);
323 }
324