minus cwnd drop bug
[swift-upb.git] / sendrecv.cpp
1 /*
2  *  datasendrecv.cpp
3  *  serp++
4  *
5  *  Created by Victor Grishchenko on 3/6/09.
6  *  Copyright 2009 Delft University of Technology. All rights reserved.
7  *
8  */
9 #include <algorithm>
10 //#include <glog/logging.h>
11 #include "p2tp.h"
12 #include "compat/util.h"
13
14
15 using namespace p2tp;
16 using namespace std; // FIXME remove
17
18 /*
19  TODO  25 Oct 18:55
20  - move hint_out_, piece picking to piece picker (needed e.g. for the case of channel drop)
21  - ANY_LAYER
22  - range: ALL
23  - randomized testing of advanced ops (new testcase)
24  - PeerCwnd()
25  - bins hint_out_, tbqueue hint_out_ts_
26  
27  */
28
29 void    Channel::AddPeakHashes (Datagram& dgram) {
30         for(int i=0; i<file().peak_count(); i++) {
31         bin64_t peak = file().peak(i);
32                 dgram.Push8(P2TP_HASH);
33                 dgram.Push32((uint32_t)peak);
34                 dgram.PushHash(file().peak_hash(i));
35         //DLOG(INFO)<<"#"<<id<<" +pHASH"<<file().peak(i);
36         dprintf("%s #%i +phash (%i,%lli)\n",tintstr(),id,peak.layer(),peak.offset());
37         }
38 }
39
40
41 void    Channel::AddUncleHashes (Datagram& dgram, bin64_t pos) {
42     bin64_t peak = file().peak_for(pos);
43     while (pos!=peak && ack_in_.get(pos.parent())==bins::EMPTY) {
44         bin64_t uncle = pos.sibling();
45                 dgram.Push8(P2TP_HASH);
46                 dgram.Push32((uint32_t)uncle);
47                 dgram.PushHash( file().hash(uncle) );
48         //DLOG(INFO)<<"#"<<id<<" +uHASH"<<uncle;
49         dprintf("%s #%i +hash (%i,%lli)\n",tintstr(),id,uncle.layer(),uncle.offset());
50         pos = pos.parent();
51     }
52 }
53
54
55 bin64_t         Channel::DequeueHint () { // TODO: resilience
56     bin64_t send = bin64_t::NONE;
57     while (!hint_in_.empty() && send==bin64_t::NONE) {
58         bin64_t hint = hint_in_.front();
59         hint_in_.pop_front();
60         send = file().ack_out().find_filtered
61             (ack_in_,hint,0,bins::FILLED);
62         dprintf("%s #%i may_send %lli\n",tintstr(),id,send.base_offset());
63         if (send!=bin64_t::NONE)
64             while (send!=hint) {
65                 hint = hint.towards(send);
66                 hint_in_.push_front(hint.sibling());
67             }
68     }
69     return send;
70 }
71
72
73 /*void  Channel::CleanStaleHints () {
74         while ( !hint_out.empty() && file().ack_out().get(hint_out.front().bin)==bins::FILLED ) 
75                 hint_out.pop_front();  // FIXME must normally clear fulfilled entries
76         tint timed_out = NOW - cc_->RoundTripTime()*8;
77         while ( !hint_out.empty() && hint_out.front().time < timed_out ) {
78         file().picker()->Snubbed(hint_out.front().bin);
79                 hint_out.pop_front();
80         }
81 }*/
82
83
84 void    Channel::AddHandshake (Datagram& dgram) {
85         if (!peer_channel_id_) { // initiating
86                 dgram.Push8(P2TP_HASH);
87                 dgram.Push32(bin64_t::ALL32);
88                 dgram.PushHash(file().root_hash());
89         dprintf("%s #%i +hash ALL %s\n",
90                 tintstr(),id,file().root_hash().hex().c_str());
91         }
92         dgram.Push8(P2TP_HANDSHAKE);
93         dgram.Push32(EncodeID(id));
94     dprintf("%s #%i +hs\n",tintstr(),id);
95     ack_out_.clear();
96     AddAck(dgram);
97 }
98
99
100 void    Channel::ClearStaleDataOut() {
101     int oldsize = data_out_.size();
102     while ( data_out_.size() && data_out_.front().time < 
103            NOW - rtt_avg_ - dev_avg_*4 )
104         data_out_.pop_front();
105     if (data_out_.size()!=oldsize)
106         cc_->OnAckRcvd(bin64_t::NONE);
107 }
108
109
110 void    Channel::Send () {
111     Datagram dgram(socket_,peer());
112     dgram.Push32(peer_channel_id_);
113     bin64_t data = bin64_t::NONE;
114     if ( is_established() ) {
115         AddAck(dgram);
116         AddHint(dgram);
117         AddPex(dgram);
118         ClearStaleDataOut();
119         if (cc_->MaySendData()) 
120             data = AddData(dgram);
121         else
122             dprintf("%s #%i no cwnd\n",tintstr(),id);
123     } else {
124         AddHandshake(dgram);
125         AddAck(dgram);
126     }
127     dprintf("%s #%i sent %ib %s\n",tintstr(),id,dgram.size(),peer().str().c_str());
128     if (dgram.size()==4) // only the channel id; bare keep-alive
129         data = bin64_t::ALL;
130     cc_->OnDataSent(data);
131         if (dgram.Send()==-1)
132         print_error("can't send datagram");
133     last_send_time_ = NOW;
134     RequeueSend(cc_->NextSendTime());
135 }
136
137
138 void    Channel::AddHint (Datagram& dgram) {
139
140     while (!hint_out_.empty()) {
141         tintbin f = hint_out_.front();
142         if (f.time<NOW-rtt_avg_*8) {
143             hint_out_.pop_front();
144         } else {
145             int status = file().ack_out().get(f.bin);
146             if (status==bins::EMPTY) {
147                 break;
148             } else if (status==bins::FILLED) {
149                 hint_out_.pop_front();
150             } else { // mixed
151                 hint_out_.front().bin = f.bin.right();
152                 f.bin = f.bin.left();
153                 hint_out_.push_front(f);
154             }
155         }
156     }
157     /*while (!hint_out_.empty() &&
158             (hint_out_.front().time<NOW-TINT_SEC ||
159             file().ack_out().get(hint_out_.front().bin)==bins::FILLED ) ) {
160         file().picker().Expired(hint_out_.front().bin);
161         hint_out_.pop_front();
162     }*/
163     uint64_t hinted = 0;
164     for(tbqueue::iterator i=hint_out_.begin(); i!=hint_out_.end(); i++)
165         hinted += i->bin.width();
166     //int bps = PeerBPS();
167     //double kbps = max(4,TINT_SEC / dip_avg_);
168     double peer_cwnd = rtt_avg_ / dip_avg_;
169     if (peer_cwnd<1)
170         peer_cwnd = 1;
171     dprintf("%s #%i hinted %lli peer_cwnd %lli/%lli=%f\n",
172             tintstr(),id,hinted,rtt_avg_,dip_avg_,((float)rtt_avg_/dip_avg_));
173
174     if ( 8*peer_cwnd > hinted ) { //hinted*1024 < peer_cwnd*4 ) {
175         
176         uint8_t layer = 2; // actually, enough
177         bin64_t hint = transfer().picker().Pick(ack_in_,layer);
178         // FIXME FIXME FIXME: any layer
179         if (hint==bin64_t::NONE)
180             hint = transfer().picker().Pick(ack_in_,0);
181         
182         if (hint!=bin64_t::NONE) {
183             hint_out_.push_back(hint);
184             dgram.Push8(P2TP_HINT);
185             dgram.Push32(hint);
186             dprintf("%s #%i +hint (%i,%lli)\n",tintstr(),id,hint.layer(),hint.offset());
187         }
188         
189     }
190 }
191
192
193 bin64_t         Channel::AddData (Datagram& dgram) {
194         if (!file().size()) // know nothing
195                 return bin64_t::NONE;
196         bin64_t tosend = DequeueHint();
197     if (tosend==bin64_t::NONE) {
198         dprintf("%s #%i out of hints\n",tintstr(),id);
199         return bin64_t::NONE;
200     }
201     if (ack_in_.is_empty() && file().size())
202         AddPeakHashes(dgram);
203     AddUncleHashes(dgram,tosend);
204     uint8_t buf[1024];
205     size_t r = pread(file().file_descriptor(),buf,1024,tosend.base_offset()<<10); 
206     // TODO: ??? corrupted data, retries
207     if (r<0) {
208         print_error("error on reading");
209         return bin64_t::NONE;
210     }
211     assert(dgram.space()>=r+4+1);
212     dgram.Push8(P2TP_DATA);
213     dgram.Push32(tosend);
214     dgram.Push(buf,r);
215     dprintf("%s #%i +data (%lli)\n",tintstr(),id,tosend.base_offset());
216     data_out_.push_back(tosend);
217         return tosend;
218 }
219
220
221 void    Channel::AddTs (Datagram& dgram) {
222     dgram.Push8(P2TP_TS);
223     dgram.Push64(data_in_.time);
224     dprintf("%s #%i +ts %lli\n",tintstr(),id,data_in_.time);
225 }
226
227
228 void    Channel::AddAck (Datagram& dgram) {
229         if (data_in_.bin!=bin64_t::NONE) {
230         AddTs(dgram);
231         bin64_t pos = data_in_.bin;
232                 dgram.Push8(P2TP_ACK);
233                 dgram.Push32(pos);
234                 //dgram.Push64(data_in_.time);
235         ack_out_.set(pos);
236         dprintf("%s #%i +ack (%i,%lli) %s\n",tintstr(),id,
237                 pos.layer(),pos.offset(),tintstr(data_in_.time));
238         data_in_ = tintbin(0,bin64_t::NONE);
239         }
240     for(int count=0; count<4; count++) {
241         bin64_t ack = file().ack_out().find_filtered(ack_out_, bin64_t::ALL, 0, bins::FILLED);
242         // TODO bins::ANY_LAYER
243         if (ack==bin64_t::NONE)
244             break;
245         while (file().ack_out().get(ack.parent())==bins::FILLED)
246             ack = ack.parent();
247         ack_out_.set(ack);
248         dgram.Push8(P2TP_ACK);
249         dgram.Push32(ack);
250         dprintf("%s #%i +ack (%i,%lli)\n",tintstr(),id,ack.layer(),ack.offset());
251     }
252 }
253
254
255 void    Channel::Recv (Datagram& dgram) {
256     if (last_send_time_ && rtt_avg_==TINT_SEC && dev_avg_==0) {
257         rtt_avg_ = NOW - last_send_time_;
258         dev_avg_ = rtt_avg_;
259         dip_avg_ = rtt_avg_;
260         transfer().hs_in_.push_back(id);
261         dprintf("%s #%i rtt init %lli\n",tintstr(),id,rtt_avg_);
262     }
263     bin64_t data = dgram.size() ? bin64_t::NONE : bin64_t::ALL;
264         while (dgram.size()) {
265                 uint8_t type = dgram.Pull8();
266                 switch (type) {
267             case P2TP_HANDSHAKE: OnHandshake(dgram); break;
268                         case P2TP_DATA:         data=OnData(dgram); break;
269                         case P2TP_TS:       OnTs(dgram); break;
270                         case P2TP_ACK:          OnAck(dgram); break;
271                         case P2TP_HASH:         OnHash(dgram); break;
272                         case P2TP_HINT:         OnHint(dgram); break;
273             case P2TP_PEX_ADD:  OnPex(dgram); break;
274                         default:
275                                 //LOG(ERROR) << this->id_string() << " malformed datagram";
276                                 return;
277                 }
278         }
279     cc_->OnDataRecvd(data);
280     last_recv_time_ = NOW;
281     if (data!=bin64_t::ALL)
282         RequeueSend(NOW);
283 }
284
285
286 void    Channel::OnHash (Datagram& dgram) {
287         bin64_t pos = dgram.Pull32();
288         Sha1Hash hash = dgram.PullHash();
289         file().OfferHash(pos,hash);
290     //DLOG(INFO)<<"#"<<id<<" .HASH"<<(int)pos;
291     dprintf("%s #%i -hash (%i,%lli)\n",tintstr(),id,pos.layer(),pos.offset());
292 }
293
294
295 bin64_t Channel::OnData (Datagram& dgram) {
296         bin64_t pos = dgram.Pull32();
297     uint8_t *data;
298     int length = dgram.Pull(&data,1024);
299     bool ok = file().OfferData(pos, (char*)data, length) ;
300     dprintf("%s #%i %cdata (%lli)\n",tintstr(),id,ok?'-':'!',pos.offset());
301     if (ok) {
302         data_in_ = tintbin(NOW,pos);
303         if (last_recv_time_) {
304             tint dip = NOW - last_recv_time_;
305             dip_avg_ = ( dip_avg_*3 + dip ) >> 2;
306         }
307         return pos;
308     } else
309         return bin64_t::NONE;
310 }
311
312
313 void    Channel::OnAck (Datagram& dgram) {
314         bin64_t ackd_pos = dgram.Pull32();
315     if (ackd_pos.base_offset()>file().size())
316         return;
317     dprintf("%s #%i -ack (%i,%lli)\n",tintstr(),id,ackd_pos.layer(),ackd_pos.offset());
318     for (int i=0; i<8 && i<data_out_.size(); i++) 
319         if (data_out_[i].bin.within(ackd_pos)) {
320             tint rtt = NOW-data_out_[i].time;
321             rtt_avg_ = (rtt_avg_*3 + rtt) >> 2;
322             dev_avg_ = ( dev_avg_*3 + abs(rtt-rtt_avg_) ) >> 2;
323             dprintf("%s #%i rtt %lli dev %lli\n",
324                     tintstr(),id,rtt_avg_,dev_avg_);
325             cc_->OnAckRcvd(data_out_[i].bin);
326         }
327         ack_in_.set(ackd_pos);
328     while (data_out_.size() && ack_in_.get(data_out_.front().bin)==bins::FILLED)
329         data_out_.pop_front();
330 }
331
332
333 /*void  Channel::OnAckTs (Datagram& dgram) {  // FIXME:   OnTs
334         bin64_t pos = dgram.Pull32();
335     tint ts = dgram.Pull64();
336     // TODO sanity check
337     dprintf("%s #%i -ackts (%i,%lli) %s\n",
338             tintstr(),id,pos.layer(),pos.offset(),tintstr(ts));
339         ack_in_.set(pos);
340         cc_->OnAckRcvd(pos,ts);
341 }*/
342
343 void Channel::OnTs (Datagram& dgram) {
344     peer_send_time_ = dgram.Pull64();
345     dprintf("%s #%i -ts %lli\n",tintstr(),id,peer_send_time_);
346 }
347
348
349 void    Channel::OnHint (Datagram& dgram) {
350         bin64_t hint = dgram.Pull32();
351         hint_in_.push_back(hint);
352     //RequeueSend(cc_->OnHintRecvd(hint));
353     dprintf("%s #%i -hint (%i,%lli)\n",tintstr(),id,hint.layer(),hint.offset());
354 }
355
356
357 void Channel::OnHandshake (Datagram& dgram) {
358     peer_channel_id_ = dgram.Pull32();
359     dprintf("%s #%i -hs %i\n",tintstr(),id,peer_channel_id_);
360     // FUTURE: channel forking
361 }
362
363
364 void Channel::OnPex (Datagram& dgram) {
365     uint32_t ipv4 = dgram.Pull32();
366     uint16_t port = dgram.Pull16();
367     Address addr(ipv4,port);
368     dprintf("%s #%i -pex %s\n",tintstr(),id,addr.str().c_str());
369     transfer().OnPexIn(addr);
370 }
371
372
373 void    Channel::AddPex (Datagram& dgram) {
374     int chid = transfer().RevealChannel(pex_out_);
375     if (chid==-1 || chid==id)
376         return;
377     Address a = channels[chid]->peer();
378     dgram.Push8(P2TP_PEX_ADD);
379     dgram.Push32(a.ipv4());
380     dgram.Push16(a.port());
381     dprintf("%s #%i +pex %s\n",tintstr(),id,a.str().c_str());
382 }
383
384
385 void    Channel::Recv (int socket) {
386         Datagram data(socket);
387         data.Recv();
388         if (data.size()<4) 
389                 RETLOG("datagram shorter than 4 bytes");
390         uint32_t mych = data.Pull32();
391         Sha1Hash hash;
392         Channel* channel = NULL;
393         if (!mych) { // handshake initiated
394                 if (data.size()<1+4+1+4+Sha1Hash::SIZE) 
395                         RETLOG ("incorrect size initial handshake packet");
396                 uint8_t hashid = data.Pull8();
397                 if (hashid!=P2TP_HASH) 
398                         RETLOG ("no hash in the initial handshake");
399                 bin64_t pos = data.Pull32();
400                 if (pos!=bin64_t::ALL32) 
401                         RETLOG ("that is not the root hash");
402                 hash = data.PullHash();
403                 FileTransfer* file = FileTransfer::Find(hash);
404                 if (!file) 
405                         RETLOG ("hash unknown, no such file");
406         dprintf("%s #0 -hash ALL %s\n",tintstr(),hash.hex().c_str());
407         for(binqueue::iterator i=file->hs_in_.begin(); i!=file->hs_in_.end(); i++)
408             if (channels[*i] && channels[*i]->peer_==data.addr) 
409                 RETLOG("have a channel already");
410                 channel = new Channel(file, socket, data.address());
411         } else {
412                 mych = DecodeID(mych);
413                 if (mych>=channels.size()) {
414             eprintf("invalid channel #%i\n",mych);
415             return;
416         }
417                 channel = channels[mych];
418                 if (!channel) 
419                         RETLOG ("channel is closed");
420                 if (channel->peer() != data.address()) 
421                         RETLOG ("invalid peer address");
422         channel->own_id_mentioned_ = true;
423         }
424     dprintf("recvd %i bytes for %i\n",data.size(),channel->id);
425     channel->Recv(data);
426 }
427
428
429 bool tblater (const tintbin& a, const tintbin& b) {
430     return a.time > b.time;
431 }
432
433
434 void    Channel::RequeueSend (tint next_time) {
435     if (next_time==next_send_time_)
436         return;
437     next_send_time_ = next_time;
438     send_queue.push_back(tintbin(next_time,id));
439     push_heap(send_queue.begin(),send_queue.end(),tblater);
440     dprintf("%s requeue #%i for %s\n",tintstr(),id,tintstr(next_time));
441 }
442
443
444 void    Channel::Loop (tint howlong) {  
445         
446     tint limit = Datagram::Time() + howlong;
447     
448     do {
449
450         tint send_time(TINT_NEVER);
451         Channel* sender(NULL);
452         while (!send_queue.empty()) {
453             send_time = send_queue.front().time;
454             sender = channel((int)send_queue.front().bin);
455             if (sender && sender->next_send_time_==send_time)
456                 break;
457             sender = NULL; // it was a stale entry
458             pop_heap(send_queue.begin(), send_queue.end(), tblater);
459             send_queue.pop_back();
460         }
461         if (send_time>limit)
462             send_time = limit;
463         if ( sender && send_time <= NOW ) {
464             dprintf("%s #%i sch_send %s\n",tintstr(),sender->id,
465                     tintstr(send_time));
466             sender->Send();
467             pop_heap(send_queue.begin(), send_queue.end(), tblater);
468             send_queue.pop_back();
469         } else {
470             tint towait = send_time - NOW;
471             dprintf("%s waiting %lliusec\n",tintstr(),towait);
472             int rd = Datagram::Wait(socket_count,sockets,towait);
473             if (rd!=INVALID_SOCKET)
474                 Recv(rd);
475         }
476         
477     } while (Datagram::Time()<limit);
478         
479 }
480