cured several connection stall bugs
authorVictor Grishchenko <victor.grishchenko@gmail.com>
Wed, 16 Dec 2009 15:06:53 +0000 (16:06 +0100)
committerVictor Grishchenko <victor.grishchenko@gmail.com>
Wed, 16 Dec 2009 15:06:53 +0000 (16:06 +0100)
BUGS
ChangeLog [changed mode: 0644->0755]
bins.cpp
doc/cloud4.jpg [deleted file]
doc/index.html
doc/style.css
p2tp.cpp
p2tp.h
send_control.cpp
sendrecv.cpp
tests/connecttest.cpp

diff --git a/BUGS b/BUGS
index 12d3b41..28b97ba 100644 (file)
--- a/BUGS
+++ b/BUGS
     v whether sending is limited by cwnd or app
     * actually: whether packets are ACKed faster than sent
     * uproot DATA NONE: complicates and deceives
-    * r735 goes to github
+    * r735 goes to github; r741
+    * receiver is swapping => strange behavior
+    v on high losses cwnd goes to silly fractions => slows down recovery
+    * code the pingpong<->keepalive<->slowstart transition
+    * empty datagram hammering (see at linode)
+    * make a testkit!!!
+    * never back from keepalive syndrome (because of underhashing)
old mode 100644 (file)
new mode 100755 (executable)
index 3c4ca6e..ee5fbf8 100644 (file)
--- a/bins.cpp
+++ b/bins.cpp
@@ -353,6 +353,8 @@ void    bins::remove (bins& b) {
 
 
 bin64_t     bins::cover(bin64_t val) {
+    if (val==bin64_t::NONE)
+        return val;
     iterator i(this,val,false);
     while (i.pos!=val && !i.solid())
         i.towards(val);
diff --git a/doc/cloud4.jpg b/doc/cloud4.jpg
deleted file mode 100644 (file)
index 1164ec5..0000000
Binary files a/doc/cloud4.jpg and /dev/null differ
index eec7d96..c034b08 100644 (file)
@@ -32,7 +32,7 @@
                wise people say</a>, the Internet was initially built for
                remotely connecting scientists to expensive supercomputers (whose computing
                power was comparable to modern cell phones). Thus, they supported the abstraction
-               of <i>conversation</i>. Currently, the Internet is mostly used for disseminating
+               of <i>conversation</i>. Currently, the Internet is mostly used for <i>disseminating</i>
                content to the masses, which mismatch definitely creates some problems.</p>
                
                <p>The <i>swift</i> protocol is a content-centric multiparty transport
                </p>
        </div>
        
-       <div class=fold>        <h2>Contacts&feedback</h2>
+       <div class=fold>        <h2>Contacts&amp;feedback</h2>
                <p><a href="mailto:victor.grishchenko@gmail.com">mail us</a></p>
                <p>subscribe to a mailing list</p>
        </div>
+       
+       </div>
 
 </body>
 </html>
\ No newline at end of file
index aec68ea..1295156 100644 (file)
@@ -1,10 +1,5 @@
 body {
-       background: #fefeff url("cloud4.jpg") fixed no-repeat;
-       #background-size: 100%; #
-       #-moz-background-size: 100% 100%;       /* Gecko 1.9.2 (Firefox 3.6) */
-    #-o-background-size: 100% 100%;         /* Opera 9.5 */
-    #-webkit-background-size: 100% 100%;    /* Safari 3.0 */
-    #-khtml-background-size: 100% 100%;     /* Konqueror 3.5.4 */
+       background: white no-repeat fixed;
        color: black;
 }
 
@@ -17,12 +12,12 @@ p {
 }
 
 body > div {
-       width: 60%;
+       width: 60em;
        margin: auto;
        margin-top: 64px;
        margin-bottom: 64px;
        #background: #d0e0ff;
-       background: rgba(208,224,255,0.8);
+       background: rgba(208,224,255,0.9);
        padding-top: 16px;
        padding-bottom: 16px;
 }
index 66984fd..ee42009 100644 (file)
--- a/p2tp.cpp
+++ b/p2tp.cpp
@@ -28,6 +28,7 @@ using namespace p2tp;
 
 p2tp::tint Channel::last_tick = 0;
 int Channel::MAX_REORDERING = 4;
+bool Channel::SELF_CONN_OK = false;
 p2tp::tint Channel::TIMEOUT = TINT_SEC*60;
 std::vector<Channel*> Channel::channels(1);
 SOCKET Channel::sockets[8] = {0,0,0,0,0,0,0,0};
@@ -46,7 +47,9 @@ Channel::Channel    (FileTransfer* transfer, int socket, Address peer_addr) :
     data_in_dbl_(bin64_t::NONE), hint_out_size_(0),
     cwnd_(1), send_interval_(TINT_SEC), send_control_(PING_PONG_CONTROL),
     sent_since_recv_(0), ack_rcvd_recent_(0), ack_not_rcvd_recent_(0),
-    last_loss_time_(0), owd_min_bin_(0), owd_min_bin_start_(NOW), owd_cur_bin_(0)
+    last_loss_time_(0), owd_min_bin_(0), owd_min_bin_start_(NOW), 
+    owd_cur_bin_(0), dgrams_sent_(0), dgrams_rcvd_(0), 
+    data_in_(TINT_NEVER,bin64_t::NONE)
 {
     if (peer_==Address())
         peer_ = tracker;
diff --git a/p2tp.h b/p2tp.h
index 95354aa..3d7f88c 100644 (file)
--- a/p2tp.h
+++ b/p2tp.h
@@ -269,6 +269,7 @@ namespace p2tp {
         static tint LEDBAT_TARGET;
         static float LEDBAT_GAIN;
         static tint LEDBAT_DELAY_BIN;
+        static bool SELF_CONN_OK;
         
         const std::string id_string () const;
         /** A channel is "established" if had already sent and received packets. */
@@ -346,6 +347,9 @@ namespace p2tp {
         tint        owd_min_bin_start_;
         tint        owd_current_[4];
         int         owd_cur_bin_;
+        /** Stats */
+        int         dgrams_sent_;
+        int         dgrams_rcvd_;
 
         int         PeerBPS() const {
             return TINT_SEC / dip_avg_ * 1024;
index 75c43c4..7111279 100644 (file)
@@ -33,7 +33,8 @@ tint    Channel::NextSendTime () {
 }
 
 tint    Channel::SwitchSendControl (int control_mode) {
-    dprintf("%s #%u sendctrl %i->%i\n",tintstr(),id,send_control_,control_mode);
+    dprintf("%s #%u sendctrl switch %s->%s\n",tintstr(),id,
+            SEND_CONTROL_MODES[send_control_],SEND_CONTROL_MODES[control_mode]);
     switch (control_mode) {
         case KEEP_ALIVE_CONTROL:
             send_interval_ = max(TINT_SEC/10,rtt_avg_);
@@ -45,6 +46,7 @@ tint    Channel::SwitchSendControl (int control_mode) {
             cwnd_ = 1;
             break;
         case SLOW_START_CONTROL:
+            cwnd_ = 1;
             break;
         case AIMD_CONTROL:
             break;
@@ -57,14 +59,12 @@ tint    Channel::SwitchSendControl (int control_mode) {
     return NextSendTime();
 }
 
-// TODO: transitions, consistently
-// TODO: may send data
 tint    Channel::KeepAliveNextSendTime () {
     if (sent_since_recv_>=3 && last_recv_time_<NOW-TINT_MIN)
         return TINT_NEVER;
     if (ack_rcvd_recent_)
         return SwitchSendControl(SLOW_START_CONTROL);
-    if (data_in_.bin!=bin64_t::NONE)
+    if (data_in_.time!=TINT_NEVER)
         return NOW;
     send_interval_ <<= 1;
     if (send_interval_>MAX_SEND_INTERVAL)
@@ -73,33 +73,33 @@ tint    Channel::KeepAliveNextSendTime () {
 }
 
 tint    Channel::PingPongNextSendTime () { // FIXME INFINITE LOOP
-    if (last_recv_time_ < last_send_time_-TINT_SEC*3) {
-        // FIXME keepalive <-> pingpong (peers, transition)
-    } // last_data_out_time_ < last_send_time_ - TINT_SEC...
-    if (false)
+    if (dgrams_sent_>=10)
         return SwitchSendControl(KEEP_ALIVE_CONTROL);
     if (ack_rcvd_recent_)
         return SwitchSendControl(SLOW_START_CONTROL);
-    if (data_in_.bin!=bin64_t::NONE)
+    if (data_in_.time!=TINT_NEVER)
         return NOW;
     if (last_recv_time_>last_send_time_)
         return NOW;
-    else if (last_send_time_)
-        return last_send_time_ + ack_timeout();
-    else
+    if (!last_send_time_)
         return NOW;
+    return last_send_time_ + ack_timeout(); // timeout
 }
 
 tint    Channel::CwndRateNextSendTime () {
-    if (data_in_.bin!=bin64_t::NONE)
+    if (data_in_.time!=TINT_NEVER)
         return NOW; // TODO: delayed ACKs
+    //if (last_recv_time_<NOW-rtt_avg_*4)
+    //    return SwitchSendControl(KEEP_ALIVE_CONTROL);
     send_interval_ = rtt_avg_/cwnd_;
+    if (send_interval_>std::max(rtt_avg_,TINT_SEC)*4)
+        return SwitchSendControl(KEEP_ALIVE_CONTROL);
     if (data_out_.size()<cwnd_) {
-       dprintf("%s #%u sendctrl next in %llius\n",tintstr(),id,send_interval_);
+        dprintf("%s #%u sendctrl next in %llius\n",tintstr(),id,send_interval_);
         return last_data_out_time_ + send_interval_;
     } else {
-        tint next_timeout = data_out_.front().time + ack_timeout();
-        return last_data_out_time_ + next_timeout;
+        assert(data_out_.front().time!=TINT_NEVER);
+        return data_out_.front().time + ack_timeout();
     }
 }
 
@@ -109,7 +109,7 @@ void    Channel::BackOffOnLosses () {
     if (last_loss_time_<NOW-rtt_avg_) {
         cwnd_ /= 2;
         last_loss_time_ = NOW;
-       dprintf("%s #%u sendctrl backoff %3.2f\n",tintstr(),id,cwnd_);
+        dprintf("%s #%u sendctrl backoff %3.2f\n",tintstr(),id,cwnd_);
     }
 }
 
index 7e1cec2..84577c0 100644 (file)
@@ -33,7 +33,7 @@ void    Channel::AddPeakHashes (Datagram& dgram) {
 
 void    Channel::AddUncleHashes (Datagram& dgram, bin64_t pos) {
     bin64_t peak = file().peak_for(pos);
-    while (pos!=peak && ((NOW&3)==3 || !data_out_cap_.within(pos.parent())) &&
+    while (pos!=peak && /*((NOW&3)==3 || !data_out_cap_.within(pos.parent())) &&*/
             ack_in_.get(pos.parent())==bins::EMPTY) {
         bin64_t uncle = pos.sibling();
         dgram.Push8(P2TP_HASH);
@@ -123,9 +123,11 @@ void    Channel::Send () {
             tintstr(),id,dgram.size(),peer().str(),peer_channel_id_);
     if (dgram.size()==4) {// only the channel id; bare keep-alive
         data = bin64_t::ALL;
-        if (send_control_!=KEEP_ALIVE_CONTROL) {
-            if ( (cwnd_/=2) < 1 )
+        if (data_out_.size()<cwnd_ && send_control_!=KEEP_ALIVE_CONTROL) {
+            if ( cwnd_ < 1 )
                 SwitchSendControl(KEEP_ALIVE_CONTROL);
+            else
+                cwnd_ /= 2;
         }
         //if (data_out_.empty() && send_control_!=KEEP_ALIVE_CONTROL)
         //     SwitchSendControl(KEEP_ALIVE_CONTROL);// we did our best
@@ -136,6 +138,7 @@ void    Channel::Send () {
         print_error("can't send datagram");
     last_send_time_ = NOW;
     sent_since_recv_++;
+    dgrams_sent_++;
 }
 
 
@@ -185,16 +188,14 @@ bin64_t        Channel::AddData (Datagram& dgram) {
     } else
         dprintf("%s #%u no cwnd #sendctrl\n",tintstr(),id);
     
-    if (tosend==bin64_t::NONE && (last_data_out_time_>NOW-TINT_SEC || data_out_.empty())) 
-        return bin64_t::NONE; // once in a while, empty data is sent just to check rtt FIXME
+    if (tosend==bin64_t::NONE)// && (last_data_out_time_>NOW-TINT_SEC || data_out_.empty())) 
+        return bin64_t::NONE; // once in a while, empty data is sent just to check rtt FIXED
     
-    if (tosend!=bin64_t::NONE) { // hashes
-        if (ack_in_.is_empty() && file().size())
-            AddPeakHashes(dgram);
-        AddUncleHashes(dgram,tosend);
-        if (!ack_in_.is_empty()) // TODO: cwnd_>1
-            data_out_cap_ = tosend;
-    }
+    if (ack_in_.is_empty() && file().size())
+        AddPeakHashes(dgram);
+    AddUncleHashes(dgram,tosend);
+    if (!ack_in_.is_empty()) // TODO: cwnd_>1
+        data_out_cap_ = tosend;
 
     if (dgram.size()>254) {
         dgram.Send(); // kind of fragmentation
@@ -204,17 +205,15 @@ bin64_t        Channel::AddData (Datagram& dgram) {
     dgram.Push8(P2TP_DATA);
     dgram.Push32(tosend.to32());
     
-    if (tosend!=bin64_t::NONE) { // data
-        uint8_t buf[1024];
-        size_t r = pread(file().file_descriptor(),buf,1024,tosend.base_offset()<<10); 
-        // TODO: corrupted data, retries, caching
-        if (r<0) {
-            print_error("error on reading");
-            return bin64_t::NONE;
-        }
-        assert(dgram.space()>=r+4+1);
-        dgram.Push(buf,r);
+    uint8_t buf[1024];
+    size_t r = pread(file().file_descriptor(),buf,1024,tosend.base_offset()<<10); 
+    // TODO: corrupted data, retries, caching
+    if (r<0) {
+        print_error("error on reading");
+        return bin64_t::NONE;
     }
+    assert(dgram.space()>=r+4+1);
+    dgram.Push(buf,r);
     
     last_data_out_time_ = NOW;
     data_out_.push_back(tosend);
@@ -232,20 +231,20 @@ void    Channel::AddTs (Datagram& dgram) {
 
 
 void    Channel::AddAck (Datagram& dgram) {
-    if (data_in_dbl_!=bin64_t::NONE) {
+    if (data_in_dbl_!=bin64_t::NONE) { // TODO: do redundancy better
         dgram.Push8(P2TP_ACK);
-        dgram.Push32(data_in_dbl_);
+        dgram.Push32(data_in_dbl_.to32());
         data_in_dbl_=bin64_t::NONE;
     }
-    if (data_in_.bin!=bin64_t::NONE) {
+    if (data_in_.time!=TINT_NEVER) { // TODO: ACK NONE for corrupted data
         AddTs(dgram);
         bin64_t pos = file().ack_out().cover(data_in_.bin);
         dgram.Push8(P2TP_ACK);
-        dgram.Push32(pos);
+        dgram.Push32(pos.to32());
         //dgram.Push64(data_in_.time);
         ack_out_.set(pos);
         dprintf("%s #%u +ack %s %s\n",tintstr(),id,pos.str(),tintstr(data_in_.time));
-        data_in_ = tintbin(0,bin64_t::NONE);
+        data_in_ = tintbin(TINT_NEVER,bin64_t::NONE);
         if (pos.layer()>2)
             data_in_dbl_ = pos;
     }
@@ -256,7 +255,7 @@ void    Channel::AddAck (Datagram& dgram) {
         ack = file().ack_out().cover(ack);
         ack_out_.set(ack);
         dgram.Push8(P2TP_ACK);
-        dgram.Push32(ack);
+        dgram.Push32(ack.to32());
         dprintf("%s #%u +ack %s\n",tintstr(),id,ack.str());
     }
 }
@@ -265,6 +264,7 @@ void    Channel::AddAck (Datagram& dgram) {
 void    Channel::Recv (Datagram& dgram) {
     dprintf("%s #%u recvd %i\n",tintstr(),id,dgram.size()+4);
     peer_send_time_ = 0; // has scope of 1 datagram
+    dgrams_rcvd_++;
     if (last_send_time_ && rtt_avg_==TINT_SEC && dev_avg_==0) {
         rtt_avg_ = NOW - last_send_time_;
         dev_avg_ = rtt_avg_;
@@ -328,9 +328,10 @@ bin64_t Channel::OnData (Datagram& dgram) {
     int length = dgram.Pull(&data,1024);
     bool ok = (pos==bin64_t::NONE) || file().OfferData(pos, (char*)data, length) ;
     dprintf("%s #%u %cdata %s\n",tintstr(),id,ok?'-':'!',pos.str());
+    data_in_ = tintbin(NOW,bin64_t::NONE);
     if (!ok) 
         return bin64_t::NONE;
-    data_in_ = tintbin(NOW,pos);
+    data_in_.bin = pos;
     if (pos!=bin64_t::NONE) {
         if (last_data_in_time_) {
             tint dip = NOW - last_data_in_time_;
@@ -400,7 +401,9 @@ void    Channel::CleanDataOut (bin64_t ackd_pos) { // TODO: isn't it too long?
 
 void    Channel::OnAck (Datagram& dgram) {
     bin64_t ackd_pos = dgram.Pull32();
-    if (ackd_pos!=bin64_t::NONE && file().size() && ackd_pos.base_offset()>=file().packet_size()) {
+    if (ackd_pos==bin64_t::NONE)
+        return; // likely, brocken packet / insufficient hashes
+    if (file().size() && ackd_pos.base_offset()>=file().packet_size()) {
         eprintf("invalid ack: %s\n",ackd_pos.str());
         return;
     }
@@ -428,6 +431,14 @@ void    Channel::OnHint (Datagram& dgram) {
 void Channel::OnHandshake (Datagram& dgram) {
     peer_channel_id_ = dgram.Pull32();
     dprintf("%s #%u -hs %x\n",tintstr(),id,peer_channel_id_);
+    // self-connection check
+    if (!SELF_CONN_OK) {
+        uint32_t try_id = DecodeID(peer_channel_id_);
+        if (channel(try_id) && !channel(try_id)->peer_channel_id_) {
+            delete this;
+            return;
+        }
+    }
     // FUTURE: channel forking
 }
 
@@ -550,9 +561,10 @@ void    Channel::Loop (tint howlong) {
  
 void Channel::Reschedule () {
     next_send_time_ = NextSendTime();
-    if (next_send_time_!=TINT_NEVER)
+    if (next_send_time_!=TINT_NEVER) {
+        assert(next_send_time_<NOW+TINT_MIN);
         send_queue.push(tintbin(next_send_time_,id));
-    else
+    else
         send_queue.push(tintbin(NOW+TINT_MIN,id));
     dprintf("%s requeue #%u for %s\n",tintstr(),id,tintstr(next_send_time_));
 }
index 11a63a3..580a373 100644 (file)
@@ -24,6 +24,7 @@ TEST(P2TP,CwndTest) {
     struct stat st;
        ASSERT_EQ(0,stat("doc/sofi.jpg", &st));
     int size = st.st_size;//, sizek = (st.st_size>>10) + (st.st_size%1024?1:0) ;
+    Channel::SELF_CONN_OK = true;
 
     int sock1 = p2tp::Listen(7001);
        ASSERT_TRUE(sock1>=0);