From: Oana Baron Date: Mon, 6 Jun 2011 17:29:54 +0000 (+0300) Subject: Add research1 docs X-Git-Url: http://p2p-next.cs.pub.ro/gitweb/?a=commitdiff_plain;h=4dbf0750ea92eadedaeb1491ae9efefb6ee3caf0;p=swifty.git Add research1 docs --- diff --git a/doc/research1/API.txt b/doc/research1/API.txt new file mode 100644 index 0000000..d90851c --- /dev/null +++ b/doc/research1/API.txt @@ -0,0 +1,48 @@ +This is the swift structures: +// swift interface +typedef struct swift { + int socketListener, socketData; + struct sockaddr_in socketListenerAddr; +} *Swift; + +// swift_addr structure similar with in_addr +struct swift_addr { + unsigned short N; // e.g. number of s_addr + unsigned long s_addr[MAX_IPs]; // i.p. ip list +}; + +// swift struct similar with sock_addr +typedef struct sockSwiftaddr { + short sin_family; // e.g. AF_INET + unsigned short sin_port; // e.g. htons(3490) + struct swift_addr sin_addr; // see struct swift_addr, below +} *SockSwiftaddr; + +// list of swift_addr +struct listsockaddr { + unsigned short N; + struct sockaddr_in sa[MAX_IPs]; +}; + +With this new structures we modify the normal socket api and the new api is: + +// Function to create a Swift socket +Swift socketSwift(); + +// Function to close a Swift socket +void closeSwift(Swift); + +// Function to listen to a port +int listenfromSwift (Swift s, void *buf, size_t len, int flags, + struct sockSwiftaddr * __restrict__ from, socklen_t *fromlen); + +// Function to bind a port for swift socket +int bindSwift(Swift s, const struct sockSwiftaddr *my_addr, socklen_t addrlen); + +// Function to receive a message +ssize_t recvFromSwift(Swift s, void *buf, size_t len, int flags, + struct sockSwiftaddr *from, socklen_t *fromlen); + +// Function to send a message +ssize_t sendToSwift(Swift s, const void *buf, size_t len, int flags, + const struct sockSwiftaddr *to, socklen_t tolen); diff --git a/doc/research1/Makefile b/doc/research1/Makefile new file mode 100644 index 0000000..5c87740 --- /dev/null +++ b/doc/research1/Makefile @@ -0,0 +1,29 @@ +BASENAME = my-report +PDF = $(addsuffix .pdf, $(BASENAME)) +DVI = $(addsuffix .dvi, $(BASENAME)) +TEX = $(addsuffix .tex, $(BASENAME)) +BIB = $(addsuffix .bib, $(BASENAME)) +LATEX = latex +PDFLATEX = pdflatex +BIBTEX = bibtex + +.PHONY: all clean + +all: $(PDF) + +$(DVI): $(TEX) $(BIB) src/ + $(LATEX) $< + $(BIBTEX) $(BASENAME) + # Twice, so TOC is also updated + $(LATEX) $< + $(LATEX) $< + +$(PDF): $(TEX) $(BIB) src/ + $(PDFLATEX) $< + $(BIBTEX) $(BASENAME) + # Twice, so TOC is also updated + $(PDFLATEX) $< + $(PDFLATEX) $< + +clean: + -rm -f *~ *.aux *.log *.blg *.bbl *.out *.pdf src/*~ *.backup src/*.backup diff --git a/doc/research1/img/.gitignore b/doc/research1/img/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/doc/research1/img/AppFinal.pdf b/doc/research1/img/AppFinal.pdf new file mode 100644 index 0000000..eba161b Binary files /dev/null and b/doc/research1/img/AppFinal.pdf differ diff --git a/doc/research1/img/RD.pdf b/doc/research1/img/RD.pdf new file mode 100644 index 0000000..9152371 Binary files /dev/null and b/doc/research1/img/RD.pdf differ diff --git a/doc/research1/img/tree.png b/doc/research1/img/tree.png new file mode 100644 index 0000000..6a34747 Binary files /dev/null and b/doc/research1/img/tree.png differ diff --git a/doc/research1/img/untitled folder/AppFinal.png b/doc/research1/img/untitled folder/AppFinal.png new file mode 100644 index 0000000..7f40b45 Binary files /dev/null and b/doc/research1/img/untitled folder/AppFinal.png differ diff --git a/doc/research1/img/untitled folder/AppFinal_bak.png b/doc/research1/img/untitled folder/AppFinal_bak.png new file mode 100644 index 0000000..1042df3 Binary files /dev/null and b/doc/research1/img/untitled folder/AppFinal_bak.png differ diff --git a/doc/research1/img/untitled folder/App_Fin.svg b/doc/research1/img/untitled folder/App_Fin.svg new file mode 100644 index 0000000..9ac289b --- /dev/null +++ b/doc/research1/img/untitled folder/App_Fin.svg @@ -0,0 +1,405 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + Rounded rectangle.3 + Kernel Space + + + + + + + Kernel Space + + Rounded rectangle + User Space + + + + + + + User Space + + Cloud + Internet + + + + + + + Internet + + Data store + Swift Socket (Data transfer) + + + + + + + + Swift Socket(Data transfer) + + Pointer (1-D) + + + + + + + Pointer (1-D).11 + + + + + + + Pointer (1-D).12 + + + + + + + Pointer (1-D).13 + + + + + + + Data store.14 + UDP Socket (Information update) + + + + + + + + UDP Socket(Information update) + + Pointer (1-D).19 + + + + + + + Pointer (1-D).20 + + + + + + + Pointer (1-D).21 + + + + + + + Pointer (1-D).22 + + + + + + + Pointer (1-D).27 + + + + + + + Pointer (1-D).28 + + + + + + + Pointer (1-D).29 + + + + + + + 12pt. text + … + + + + + + + + + + … + + 12pt. text.32 + … + + + + + + + + + + … + + Pointer (1-D).33 + + + + + + + 12pt. text.31 + … + + + + + + + + + + … + + Rounded rectangle.34 + Swift Library + + + + + + + Swift Library + + Rounded rectangle.23 + Application which uses SWIFT Library + + + + + + + Application which uses SWIFT Library + + Pointer (1-D).35 + + + + + + + Rounded rectangle.36 + Swift Transport Layer + + + + + + + Swift Transport Layer + + Rounded rectangle.37 + IP 1 + + + + + + + IP 1 + + Rounded rectangle.38 + Physical Layer 1 + + + + + + + Physical Layer 1 + + Rounded rectangle.39 + Data Link 1 + + + + + + + Data Link 1 + + Rounded rectangle.7 + Data Link N + + + + + + + Data Link N + + Rounded rectangle.8 + IP N + + + + + + + IP N + + Rounded rectangle.9 + Physical Layer 1 + + + + + + + Physical Layer 1 + + Rounded rectangle.5 + UDP Transport Layer + + + + + + + UDP Transport Layer + + Rounded rectangle.15 + Data Link + + + + + + + Data Link + + Rounded rectangle.24 + IP + + + + + + + IP + + Rounded rectangle.16 + Physical Layer + + + + + + + Physical Layer + + diff --git a/doc/research1/img/untitled folder/RD.png b/doc/research1/img/untitled folder/RD.png new file mode 100644 index 0000000..8b7a54c Binary files /dev/null and b/doc/research1/img/untitled folder/RD.png differ diff --git a/doc/research1/img/untitled folder/RD_.pdf b/doc/research1/img/untitled folder/RD_.pdf new file mode 100644 index 0000000..696d573 Binary files /dev/null and b/doc/research1/img/untitled folder/RD_.pdf differ diff --git a/doc/research1/img/untitled folder/RD_.svg b/doc/research1/img/untitled folder/RD_.svg new file mode 100644 index 0000000..2a076eb --- /dev/null +++ b/doc/research1/img/untitled folder/RD_.svg @@ -0,0 +1,212 @@ + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + Function / subroutine + Swift Application + + + + + + + Swift Application + + Function / subroutine.2 + Swift Library + + + + + + + Swift Library + + Invocation + + + + + + + + Invocation.4 + + + + + + + + Function / subroutine.5 + Kernel Space + + + + + + + Kernel Space + + Function / subroutine.6 + Kernel Space 1 + + + + + + + Kernel Space 1 + + Invocation.7 + + + + + + + + Invocation.8 + + + + + + + + Function / subroutine.9 + Kernel Space N + + + + + + + Kernel Space N + + Invocation.13 + + + + + + + + 12pt. text + Request + + + + + + + + + + Request + + 12pt. text.16 + SysCall + + + + + + + + + + SysCall + + 12pt. text.17 + ……….. + + + + + + + + + + ……….. + + Invocation.18 + + + + + + + + Invocation.19 + + + + + + + + 12pt. text.20 + Network communication + + + + + + + + + + Network communication + + Invocation.10 + + + + + + + + diff --git a/doc/research1/my-report.bib b/doc/research1/my-report.bib new file mode 100644 index 0000000..728f1ee --- /dev/null +++ b/doc/research1/my-report.bib @@ -0,0 +1,81 @@ +# sample entry +@inproceedings{large-scale-p2p-sim, + author = {Dinh, Tien Tuan Anh and Theodoropoulos, Georgios and Minson, Rob}, + title = {{Evaluating Large Scale Distributed Simulation of P2P Networks}}, + booktitle = {DS-RT '08: Proceedings of the 2008 12th IEEE/ACM International Symposium on Distributed Simulation and Real-Time Applications}, + year = {2008}, + isbn = {978-0-7695-3425-1}, + pages = {51--58}, + doi = {http://dx.doi.org/10.1109/DS-RT.2008.36}, + publisher = {IEEE Computer Society}, + address = {Washington, DC, USA}, +} + +@misc{binmaps, + author = {V. Grishchenko, J. Pouwelse}, + title = {{Binmaps: hybridizing bitmaps and binary trees}}, + year = {2009}, + howpublished = {http://bouillon.math.usu.ru/articles/binmaps-alenex.pdf}, +} + +@misc{merkle, + author = {R. Merkle}, + title = {{A Digital Signature Based on a Conventional Encryption Function}}, + booktitle = {Proceedings CRYPTO'87}, + year = {1987}, + pages = {369--378}, + address = {Santa Barbara, CA, USA}, +} + +@misc{bittorrent, + author = {Bram Cohen}, + title = {{Incentives Build Robustness in BitTorrent}}, + howpublished = {http://www.bittorrent.org/bittorrentecon.pdf}, +} + +@misc{merkle-ext, + author = {Arno Bakker}, + title = {{Merkle hash torrent extension}}, + booktitle = {BEP 30}, + howpublished = {http://bittorrent.org/beps/bep\_0030.html}, +} + +@misc{ledbat, + author = {S. Shalunov}, + title = {{Low Extra Delay Background Transport (LEDBAT)}}, + howpublished = {http://www.ietf.org/id/draft-ietf-ledbat-congestion-00.txt}, + year = {2009}, +} + +@misc{ledbat2, + author = {S. Shalunov, G. Hazel}, + title = {{Low Extra Delay Background Transport (LEDBAT)}}, + howpublished = {http://tools.ietf.org/html/draft-ietf-ledbat-congestion-03}, + year = {2010}, +} + + +@misc{peer-to-peer, + author = {B. Ford, P. Srisuresh, D. Kegel}, + title = {{Peer-to-Peer Communication Across Network Address Translators}}, + howpublished = {http://www.brynosaurus.com/pub/net/p2pnat/}, +} + +@misc{cdnwiki, + title = {{Wikipedia Content Delivery Network(CDN) page}}, + howpublished = {http://en.wikipedia.org/wiki/Content_delivery_network}, +} + +@misc{kernel, + author = {R. Love}, + title = {{Linux Kernel Development}}, + booktitle = {{Talking Directly to the Kernel and C Library}}, + year = {2004}, +} + +@misc{socket, + author = {R. Stevens}, + title = {{UNIX Network Programming, Volume 2, Second Edition}}, + booktitle = {{Interprocess Communications}}, + year = {1999}, +} \ No newline at end of file diff --git a/doc/research1/my-report.tex b/doc/research1/my-report.tex new file mode 100644 index 0000000..d8889ed --- /dev/null +++ b/doc/research1/my-report.tex @@ -0,0 +1,110 @@ +% vim: set tw=78 sts=2 sw=2 ts=8 aw et ai: +\documentclass[12pt]{article} + +\usepackage[paper=a4paper, top=2cm, bottom=3cm, left=2.5cm, right=2.5cm]{geometry} + +\usepackage{ucs} +\usepackage[utf8x]{inputenc} +\usepackage[english]{babel} +\usepackage{hyperref} % use \url{http://$URL} or \href{http://$URL}{Name} +\usepackage{underscore} % underscores need not be escaped +\usepackage{subfigure} +\usepackage{verbatim} +\usepackage{float} +\usepackage{booktabs} + +% Support for including graphics +\usepackage{graphicx} +\DeclareGraphicsExtensions{.pdf,.png,.jpg} + +\usepackage{hyperref} + +\hypersetup{% + colorlinks=true, + linkcolor=blue, + anchorcolor=black, + citecolor=black, + urlcolor=blue, + bookmarks=true, + bookmarksnumbered=true +} +\urlstyle{same} + +\newcommand{\labelindexref}[2]{\hyperref[#2]{#1~\ref*{#2}}} +% command for inserting labeled figures +\newcommand{\image}[4][]{ +\begin{figure}[htb] +\begin{center} +\includegraphics[#1]{#2} +\caption{#4 \label{#3}} +\end{center} +\end{figure} +} + +\setlength{\parindent}{0pt} +\setlength{\parskip}{2ex} + + +\title{\bfseries{SWIFT in the Linux Kernel\\ +\large{\vspace*{0.4cm}Scientific supervisor: Nicolae Țăpuș\\ Technical supervisor: Răzvan Deaconescu}}} + +\author{Oana Baron, Bogdan Druțu\\ +Automatic Control and Computers Faculty\\ +University Politehnica of Bucharest\\ +Splaiul Independenței nr. 313, Bucharest, Romania \\ +\emph{\{oana.baron, bogdan.drutu\}@cti.pub.ro}} + +\date{\today} + +\begin{document} + +\maketitle + +\begin{abstract} +\input{src/abstract} +\end{abstract} + +{\bf \hspace*{0.8cm} \textbf{\emph{Keywords}} -- bittorrent, multiparty, transport protocol, linux kernel,\\ +\hspace*{0.8cm} Merkle hash trees, binmaps} + +\section{Introduction} +\label{sec:introduction} +\input{src/intro} + +\section{Related Work} +\label{sec:relatedwork} +\input{src/relatedwork} + +%\pagebreak + +\section{Swift Protocol Description} +\label{sec:swift} +\input{src/swiftdescrip} + +\section{Implementation} +\label{sec:implementation} +\input{src/implement} + +\section{Utilization Example} +\label{sec:example} +\input{src/example} + +\section{Preliminary Results} +\label{sec:results} +\input{src/results} + +\section{Conclusion and Further Work} +\label{sec:summary} +\input{src/summary} + +%\section*{Acknowledgment} +%\label{sec:acknowledgment} + +%The authors would like to thank XYZ for their support and dedication. + +\pagebreak + +\bibliographystyle{abbrv} +\bibliography{my-report} + +\end{document} diff --git a/doc/research1/src/abstract.tex b/doc/research1/src/abstract.tex new file mode 100644 index 0000000..e790bb9 --- /dev/null +++ b/doc/research1/src/abstract.tex @@ -0,0 +1,9 @@ +%\begin{abstract} +{\bf +BitTorrent is the protocol responsible with the greatest chunk of traffic in the Internet. A new approach, similar to +BitTorrent, is swift – the multiparty(swarming) transport protocol. Swift may be understood as BitTorrent at the +transport layer. Ultimately swift aims at the abstraction of the Internet as a single big data cloud. This paper +proposes an approach to the swift integration into kernel as a transport protocol in the Linux kernel networking +stack.} + +%\end{abstract} diff --git a/doc/research1/src/example.tex b/doc/research1/src/example.tex new file mode 100644 index 0000000..d7d2e52 --- /dev/null +++ b/doc/research1/src/example.tex @@ -0,0 +1,43 @@ + + +Here is an example of how to use swift library: + +\textbf{Seeder example} +\begin{itemize} + \item The first step is to create socket:\\\\ +\small{\emph{ +\hspace*{1cm} Swift s = socketSwift(); +}} + \item The second step is to bind socket: \\\\ +\small{\emph{ +\hspace*{1cm} SockSwiftaddr ssa = (SockSwiftaddr) calloc (1, sizeof(struct, sockSwiftaddr);\\ +\hspace*{1cm} Ssa-$>$sin_port = 9000;\\ +\hspace*{1cm} bindSwift(s, ssa, sizeof(*ssa));\\ +}} + \item The third step is to listen from socket. The difference between normal socket listen is that after listen we will +have the request data, so we don’t need an accept step:\\\\ +\small{\emph{ +\hspace*{1cm} SockSwiftaddr ret = (SockSwiftaddr) calloc (1, sizeof(struct, sockSwiftaddr);\\ +\hspace*{1cm} char * buf = (char*) calloc(MAXSIZE, ret);\\ +\hspace*{1cm} while (1) \{\\ +\hspace*{1.7cm} size_t len = ListenFromSocket(s, buf, MAXSIZE, 0, ret, sizeof(*ret));\\ +\hspace*{1.7cm} sendToSwift (s, buf, len, 0, ret, sizeof(*ret));\\ +\hspace*{1cm} \} +}} +\end{itemize} + +\textbf{Peer example} +\begin{itemize} + \item The first step is to create socket :\\\\ +\small{\emph{ +\hspace*{1cm} Swift s = socketSwift(); +}} + \item The second step is to make a request for receiving data:\\\\ +\small{\emph{ +\hspace*{1cm} SockSwiftaddr from = (SockSwiftaddr) calloc (1, sizeof(struct, sockSwiftaddr);\\ +\hspace*{1cm} from-$>$sin_addr.N = 1;\\ +\hspace*{1cm} from-$>$sin_addr.s_addr[0] = (192$<<$24) + (168$<<$16) + (1$<<$8) + 101\\ +\hspace*{1cm} char * buf = (char*) calloc(MAXSIZE, ret);\\ +\hspace*{1cm} size_t = recvFromSwift (s, buf, len, 0, from, sizeof(*from));\\ +}} +\end{itemize} diff --git a/doc/research1/src/implement.tex b/doc/research1/src/implement.tex new file mode 100644 index 0000000..d643c62 --- /dev/null +++ b/doc/research1/src/implement.tex @@ -0,0 +1,111 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{IMPLEMENTATION}}}} + +\subsection{Motivation} + +Current Internet protocols are geared for 1:1 client/server communication. \emph{Swift} expands the TCP/IP protocol +suite with swarming. It is designed to be capable of integration into browsers or operating systems and is +able to serve 95\% of current Internet traffic. + +The Linux kernel is an operating system kernel used by the Linux family of Unix-like operating systems. It is one of +the most prominent examples of free and open source software. We chose this operating system because of the freedom +to develop with open source rather than depending on a closed environment. This way we have direct access to the kernel +code to modify it for our specific purpose. Another bonus is the open syntax describing all aspects regarding the +Linux kernel internals. + +We are motivated to implement the \emph{swift} protocol as part of the transport layer - at the OSI level 4, because of +the benefits that will be gained by reducing the number of system calls made from user space into the kernel and +the number of preemption moments. + +\subsection{Design Goals} + +We propose an integration of \emph{swift} protocol as a transport protocol in the Linux kernel networking stack to +ensure maximum efficiency of data transfer. + +Chief design goals include easy integration in the Linux kernel and minimal disruption to current \emph{swift} +implementation (libswift). + +The idea is to implement only transport-related components in kernel space. Leave freedom of application layer +protocols over the \emph{swift} multiparty transport. Kernel transport layer implementation is only concerned with +getting the “bytes going”. Peer discovery and piece information will remain implemented in user +space applications. + +Another goal is to maintain the transparency of the communication between user and kernel-space. To ensure this a +socket-like interface will be provided to the application designer. The user-space does not need to be aware of the +kernel implementation. From the user perspective view how the data is transfer it is not important. + +\subsection{Architecture} + +\labelindexref{Figure}{img:arch} shows a generic view of the application and the communication between user and kernel +space. + +\image[scale=0.45]{img/AppFinal}{img:arch}{Architecture - High Level View} + +The \emph{swift} transport layer is a new kernel interface allowing the creation of specialized \emph{swift} sockets. +It implements the multiparty protocol allowing piece transport to/from other hosts in a peer-to-peer fashion. + +There are specialized “request queues”, metadata queues, to/from user space. The receiver request queue stores the user +space demands acquisition of pieces of data, while the sender request queue stores the kernel space demands. These +demands represents requests to the user space for delivery of pieces of data in name of other peers. + +\emph{Swift} specialized “data queues” (receiver/sender), similar to TCP buffers, allow delivery of data to/from user +space. Pieces are identified through Merkle hashes. + +Specialized system call API allows user space applications to interact with the above mentioned queues and, thus, with +the multiparty transport protocol implementation. + +Innate differences from a classical one-to-one communication such as UDP or TCP means the system call API doesn't +follow the classical send/receive paradigm. In order to compensate this and to provide a rather “friendly” interface to +user space applications, a library is designed that will provide a simpler interface (SWIFT Library in the above image). + +Peer and piece discovery are the responsibility of the user space application. The SWIFT Library may also provide +wrappers over a UDP-based channel for discovery. + +Merkle hashes are stored and computed in user space. It is the responsibility and freedom of the application to decide +piece priority acquisition or treatment of other peers. + +\image[scale=0.4]{img/RD}{img:rd}{Architecture - Workflow} + +\labelindexref{Figure}{img:rd} describes the “receive/send data” process. The request is initiated from the user space +application, which uses the \emph{swift} library API to communicate with the kernel space. This request will be added to +the +specialized “request queue”. Each of these requests will generate a system call. The kernel space implementation is +solely responsible for providing a clean multiparty transport capability allowing maximum efficiency for peer-to-peer +like application protocols. Basically, every request will be sent to every peer application that haves the specific +pieces requested. So it is possible to have multiple responses, but only the first one will be accepted and the others +will be dropped. + +\subsection{Challenges} + +The first approach we thought of was to include all the \emph{swift} protocol into the kernel space. This approach could +not be implemented because of the restriction of the memory size in the kernel space. For the integrity check the +\emph{swift} protocol relies on Merkle hash tree. Keeping this tree in the kernel space memory is not scalable. The +Internet content is too large to be stored in kernel even if the tree retains only hashes of the data disseminated. + +After this we concluded that the kernel space implementation will only be used for multiparty transport. Other +decisions/chores are left for the user space application. For example, the UDP channel is a control channel that will +remain in user space and may be understood as an overlay for updating peer/piece information. All the informations +regarding peers and data hashes are also stored in the user space. + +Another issue is represented by the communication between user and kernel space. There are three main lines of +communication with the Linux kernel. The first one consist in char devices that would require creation of a specialized +device. This approach it would go against "this is a network protocol and provides a socket-like API". The second way to +communicate between kernel and user space is with netlink sockets. This type of sockets are non-standard, Linux-only +sockets, and are mostly used for specialized kernel subsystems (such as routing table management). The last one is +represented by the classical communication - using system calls. This is our choice as we plan to update the +socket/network system call API (\texttt{sys\_socket}, \texttt{sys\_bind}, \texttt{sys\_sendmsg} etc.) with an +implementation of multiparty socket - \emph{swift} sockets. + +The main challenge was how to modify the classical network system call, which was designed for peer-to-peer +communication, to support multiparty communication. In our approach, from the user point of view, a single socket +exist and with our library implementation he can access this socket in a classical manner. However, in reality there +are many sockets behind. A \emph{swift} application must be able to be both sender and receiver. For this reason +it will exist one socket that will listen for new data requests -- for the seeder part, a socket which carries the data +that needs to be transfered and a socket on which the requested data is received. + +The main steps in starting up a application that uses \emph{swift} multiparty protocol are detailed in this paragraph. +At first, regardless of what role will the application take - seeder or peer, there should be a call to the +\texttt{socketSwift()} function. This will create the actual sockets. Next, for the seeder part of the application, +there should be a \texttt{bindSwift()} call and a loop with the \texttt{listenFromSocket()} call, waiting for data +requests. The seeder can respond to client request by issuing \texttt{sendToSwift()} with the appropriate informations. +A peer, on the other hand, should call \texttt{recvFromSwift()} in a loop to receive all the necessary data +from the seeders. \ No newline at end of file diff --git a/doc/research1/src/intro.tex b/doc/research1/src/intro.tex new file mode 100644 index 0000000..b927ec5 --- /dev/null +++ b/doc/research1/src/intro.tex @@ -0,0 +1,30 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{INTRODUCTION}}}} + +The \emph{swift} protocol is a generic multiparty transport protocol. Its mission is to disseminate content among a +swarm of peers. Basically, it answers one and only one request: \emph{'Here is a hash! Give me data for it!'}. Such +entities as storage, servers and connections are abstracted and are virtually invisible at the API layer. Given a hash, +the data is received from whatever source available and data integrity is checked cryptographically with Merkle hash +trees. + +If you need some data it is somewhat faster and/or cheaper downloading it from a nearby well-provisioned replica, but +on the other hand, this process requires that multiple parties (e.g. consumers, the data sources, CDN +sites\cite{cdnwiki} , mirrors, peers) have to be coordinate. As the Internet content is in a continuous increasing +nowadays, the overhead of peer/replica coordination becomes higher then the mass of the download itself. Thus, the niche +for multiparty transfers expands. Still, current, relevant technologies are tightly coupled to a single use case or even +infrastructure of a particular corporation. These are the reasons of the \emph{swift} protocol appearance with its +primary goal to act as a generic content-centric multiparty transport protocol that allows seamless, effortless data +dissemination on the big cloud represented by the Internet. + +\textbf{Contribution}. Our main objective is to integrate \emph{swift} as a transport protocol in the Linux kernel +networking stack. This will provide notable performance improvement regarding data transfer. We intend to do this with +minimal intrusion effect in the Linux kernel and also to change as little as possible the current \emph{swift} +implementation. Another goal is to provide a transparent API between the kernel and the user space. A developer will use +a socket-like interface when building an application on top of the \emph{swift} protocol. + +\textbf{Outline}. The rest of the paper is organized as follows. In section \ref{sec:relatedwork} we discuss related +work, regarding the \emph{swift} protocol, transport layer protocols, bitTorrent and LEDBAT. In section \ref{sec:swift} +we describe the existing \emph{swift} protocol design choices. Section \ref{sec:implementation} presents our approach to +integrate the \emph{swift} protocol as a transport layer protocol into the Linux kernel. We discuss about our motivation +and goals and the architectural challenges that we have encountered. Section \ref{sec:example} gives an usage example, +which highlights the basic steps for working with the API. In section \ref{sec:results} we describe the +theoretical results of our proposal. Section \ref{sec:summary} concludes the article and refers to future work. diff --git a/doc/research1/src/relatedwork.tex b/doc/research1/src/relatedwork.tex new file mode 100644 index 0000000..2249827 --- /dev/null +++ b/doc/research1/src/relatedwork.tex @@ -0,0 +1,31 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{PREVIOUS RELATED WORK}}}} + +The \emph{swift} protocol is currently implemented in user-space on top of UDP, the thinnest wrapper over IP. It +entirely drops TCP's abstraction of sequential reliable data stream delivery - for \emph{swift} this is redundant. For +example, out-of-order data could still be saved and the same piece of data might always be received from another peer. +Being implemented over UDP, the protocol does its best to make every datagram self-contained. Due to the fact that the +Internet is mostly used for disseminating content, the \emph{swift} aims at creating a single unified content-centric +transport protocol serving as a bridge language of content distribution. To implement that ultimate data cloud model, +the protocol has to unify use cases of data download, video-on-demand and live streaming. It must work in the settings +of client-server, peer-to-peer\cite{peer-to-peer}, CDN or peer-assisted networks, effectively blending those +architectures. + + +BitTorrent is also a peer-to-peer file sharing protocol used for distributing large amounts of data \cite{bittorrent}. +This protocol can distribute large files without large bandwidth consumption or cpu power. Rather than downloading a +file from a single source, the BitTorrent protocol allows users to join a "swarm" of hosts to download and upload from +each other simultaneously. Every file is divided into pieces. Just like in \emph{swift} protocol, when a hosts fully +received a piece, it can distribute it to other hosts. This way, the task of distributing a file is supported by all who +need that file. + +In BitTorrent, every piece has a cryptographic hash attached in the torrent descriptor. When an entire piece is +received, the host recalculate the hash in order to verify the authenticity of the actual piece. The integrity of the +data is checked in the same manner also for \emph{swift} protocol, but in this case the hash is send along with the +useful data. + +LEDBAT\cite{ledbat}, \cite{ledbat2} is an experimental delay-based congestion control algorithm. It's main goal is to +utilize the available bandwidth on an end-to-end path while limiting the consequent increase in queueing delay on the +path. LEDBAT uses changes in one-way delay measurements to limit congestion induced in the network by the LEDBAT flow. +LEDBAT is designed largely for use by background bulk-transfer applications. It is designed to be no more aggressive +than TCP congestion control and yields in the presence of competing TCP flows, thus reducing interference with the +network performance of the competing flows. The \emph{swift} protocols relies on LEDBAT for congestion control support. diff --git a/doc/research1/src/results.tex b/doc/research1/src/results.tex new file mode 100644 index 0000000..e7122f5 --- /dev/null +++ b/doc/research1/src/results.tex @@ -0,0 +1,15 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{PRELIMINARY RESULTS}}}} + +Our main focus when modifying the \emph{swift} implementation is to have an impact on time performance. With a +communication +protocol the greatest latency is usually generated by waiting for the results from the network. The multiparty +communication model already takes care of this, so the next best thing is to enhance the application time. We are doing +this by decreasing the time penalties due to context switches between user space and kernel space. The main idea is to +reduce the number of system calls made from user space into the kernel. This implicitly reduces the number of +preemption moments. + +Another benefit is achieved by providing a user friendly API, through our library, that would simplify the +communication between user and kernel space. The common implementation of sockets provides support for peer to peer +communication, while \emph{swift} needs a multiparty communication, from one peer to many peers. This type of socket +implementation will be achieved with our \emph{swift} sockets. The library will ensure transparency and an abstraction +level of this communication. \ No newline at end of file diff --git a/doc/research1/src/summary.tex b/doc/research1/src/summary.tex new file mode 100644 index 0000000..e8950dd --- /dev/null +++ b/doc/research1/src/summary.tex @@ -0,0 +1,21 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{SUMMARY}}}} + +The \emph{swift} protocol is a multiparty content-centric protocol that aims to disseminate content among a swarm of +peers. This paper proposes an approach for the optimization of the currently \emph{swift} protocol. The integration of +the communication in the kernel space as a multiparty transport protocol that is solely responsible for getting the +bits moving improves the over all protocol performance. It ensures maximum efficiency of data transfer by decreasing +switches between user and kernel space and eliminating some performance penalties due to context switches. + +\subsection*{Directions for Future Work} + + +After we complete the implementation and the functional tests, we want to test extensively our new features in a real +environment. We plan to do stress tests using a cluster. This tests will help us to make an overview about our +implementation and we could compare with the user-space implementation of the \emph{swift} to determine exactly what +performance we encountered. If the results are satisfactory, we will continue to optimize our program and we will add +new features. + +It will be very useful to have a real application on top of the \emph{swift} protocol. If not, one solution would be to +port an application strictly for this task. This would give us the opportunity to extend and refine our implementation, +and also to extend the library API. + diff --git a/doc/research1/src/swiftdescrip.tex b/doc/research1/src/swiftdescrip.tex new file mode 100644 index 0000000..1188648 --- /dev/null +++ b/doc/research1/src/swiftdescrip.tex @@ -0,0 +1,81 @@ +%\section{\fontfamily{phv}\selectfont{\large{\bfseries{SWIFT PROTOCOL DESCRIPTION}}}} + +Most features of the \emph{swift} protocol are defined by its function as a content-centric multiparty transport +protocol. A significant difference between \emph{swift} and the TCP protocol is that TCP possesses no information +regarding what data it is dealing with, as the data is passed from the user-space, while the \emph{swift} protocol has +data fixed in advance and many peers participate in distributing the same data. Because of this and the fact that for +\emph{swift} the order of delivery is of little importance and unreliability is naturally compensated for by redundancy, +it entirely drops TCP's abstraction of sequential reliable data stream delivery. For example, out-of-order data could +still be saved and the same piece of data might always be received from another peer. + +Being implemented over UDP, the protocol does its best to make every datagram self-contained so each datagram could be +processed separately and a loss of one datagram must not disrupt the flow. Thus, a datagram carries zero or more +messages, and neither messages nor message interdependencies should span over multiple datagrams. + +The verification of data pieces is realize using Merkle hash trees\cite{merkle}, \cite{merkle-ext}. That means that all +hashes necessary for verifying data integrity needs to be put into the same datagram as the data. For both use cases, +streaming and downloading, an unified integrity checking scheme that works down to the level of a single datagram is +developed. As a general rule, the sender should append to the data some meta-data represented by the necessary hashes +for the data verification. While some optimistic optimizations are definitely possible, the receiver should drop data if +it is impossible to verify it. Before sending a packet of data to the receiver, the sender inspects the receiver's +previous acknowledgments to derive which hashes the receiver already has for sure. + +The data is acknowledged in terms of binary intervals, with the base interval of 1KB "packet". As a result, every +single packet is acknowledged logarithmic number of times. This mechanism provides some necessary redundancy of the +acknowledgements and sufficiently compensates the unreliability of the datagrams. + +The only function of TCP that is also critical for \emph{swift} is the congestion control. To facilitate delay-based +congestion control an acknowledgment contains besides the dimension of the file received from its addressee a timestamp. + +Binary intervals numbering is done in the order of interval's "center", ascending, namely: + + +%\hspace*{3.75cm} 7 +% +%\vspace*{-0.3cm} +%\hspace*{2.7cm} 3 \hspace*{1.65cm} 11 +% +%\vspace*{-0.22cm} +%\hspace*{2.2cm} 1 \hspace*{0.6cm} 5 \hspace*{0.7cm} 9 \hspace*{0.8cm} 13 +% +%\vspace*{-0.17cm} +%\hspace*{1.85cm} 0 \hspace*{0.2cm} 2 \hspace*{0.2cm} 4 \hspace*{0.1cm} 6 \hspace*{0.2cm} 8 \hspace*{0.1cm} 10 +%\hspace*{0.1cm} 12 \hspace*{0.1cm} 14 + +\image[scale=0.28]{img/tree}{img:tree}{Binary interval tree} + +Suppose, the receiver had acknowledged the first binary interval, then it must already have uncle hashes 5, 11 and so on. +That is because those hashes are necessary to check the packets of the first two kilobytes acknowledged against the +root hash. Then, hashes 3, 7 and so on must be also known as they are calculated in the process of checking the uncle +hash chain. Hence, to send the 12 binary interval, which represents the 7th kilobyte of data, the sender needs to +prepend hashes for binary intervals 14 and 9. This are the only hashes needed to check the against hash 11 which is +already known to the receiver. + +The sender may optimistically skip hashes which were sent out in previous (still unacknowledged) datagrams. It is an +optimization trade off between redundant hash transmission and possibility of collateral data loss in the case some +necessary hashes were lost in the network so some delivered data cannot be verified and thus has to be dropped. In +either case, the receiver builds the Merkle tree on-demand, incrementally, starting from the root hash, and uses it for +data validation. + +The concept of peak hashes enables two cornerstone features of \emph{swift}: download and streaming unification and +file size proving. Formally, peak hashes are hashes defined over filled binary intervals, whose parent hashes are +defined over incomplete, not filled, binary intervals. Filled binary intervals is a binary interval which does not +extend past the end of the file, or, more precisely, contains no empty packets. Practically, we use peaks to cover the +data range with logarithmic number of hashes, so each hash is defined over a "round" aligned $2^k$ interval. + +The classical problem of keeping huge bitmaps predominantly consisting of long ranges of zeros and ones is most often +encountered in file systems (free space tracking) and network protocols (transmission progress tracking). For this +problem three common solutions are available: plain bitmaps, extent lists and extent binary trees. Bitmaps are simple +but have high fixed space requirements. Lists are able to aggregate solid ranges, but they don’t scale well with regard +to search. Extent binary trees are able of aggregation, allow scalable search, but have high overhead and extremely bad +worst case behavior, potentially exploding to sizes a couple orders of magnitude higher than plain bitmaps. The latter +problem is sometimes resolved by ad-hoc means, e.g. by converting parts of an extent tree to bitmaps. Another possible +workaround is to impose a divide-and-conquer multilayered unit system (BitTorrent \cite{bittorrent}). + +\emph{Swift} solution is a new data structure named “binmap”\cite{binmaps}, a hybrid of bitmap and binary tree, which +resolves the shortcomings of the extent binary tree approach. Namely it has lower average-case overhead and as it is +tolerant to patchy bitmaps, its worst-case behavior is dramatically better. + +\input{src/tab2} + +\input{src/tab1} \ No newline at end of file diff --git a/doc/research1/src/tab1.tex b/doc/research1/src/tab1.tex new file mode 100644 index 0000000..0606e9e --- /dev/null +++ b/doc/research1/src/tab1.tex @@ -0,0 +1,39 @@ + +The second table highlights the main differences between three transport protocols UDP, TCP and \emph{swift}. In +a perfect network UDP is the fastest way to transfer data, but in a real network problem arises because of corrupt +packets or failed transfers. TCP ensures the correct transmission and because of this it's slower than UDP. \emph{Swift} +tries to make a compromise between UDP and TCP. \emph{Swift} is a connection-less protocol, but checks the validity of +the received data. Also, transfers are made between more seeders, not just between a server and a host like TCP does. +Thus, in a real network, \emph{Swift} is more secure than UDP and faster than TCP. + + +\begin{center} + \begin{tabular}{|c|c|c|c|} + \toprule + Properties & \multicolumn{3}{|c|}{Protocols} \\ + \cmidrule(r){2-4} + & Swift & UDP & TCP \\ + \midrule + Sockets & 1 & 1 & 1 \\ + \midrule + Listening Ports & 1 & 1 & 1 \\ + \midrule + Packets received for sending data & 1 & 1 & 1 \\ + \midrule + Packets sent for sending data & 1 & 1 & $Nr_{fails} + 1$ \\ + \midrule + Packets sent for getting data & N & 1 & $Nr_{fails} + 1$ \\ + \midrule + Packets received for getting data & N & 1 & $Nr_{fails} + 1$ \\ + \midrule + Util packets when getting data & 1 & 1 & 1 \\ + \midrule + Recall posibility when getting data & $\approx\frac{100}{2^{N}}\%$ & $75\%$ & $0\%$ \\ + \midrule + Usability when getting data & $\frac{1}{N}\%$ & $100\%$ & $100\%$ \\ + \midrule + Bandwidth used & high usage & low usage & medium usage \\ + \bottomrule + \end{tabular} +\end{center} + diff --git a/doc/research1/src/tab2.tex b/doc/research1/src/tab2.tex new file mode 100644 index 0000000..d53a06f --- /dev/null +++ b/doc/research1/src/tab2.tex @@ -0,0 +1,40 @@ +In the next table we will discuss a comparison between \emph{swift} application and BitTorrent. We take into +consideration the most important features that we have in \emph{swift} and BitTorrent. + +\begin{center} + \begin{tabular}{|c|c|c|} + \toprule + \centering{Properties} & \multicolumn{2}{|c|}{Protocols} \\ + \cmidrule(r){2-3} + & Swift & BitTorrent \\ + \midrule + Listening Ports & 1 & 1 \\ + \midrule + Sockets create for getting M pieces & 1 & M \\ + \midrule + Packets receive for sending 1 piece & 1 & 1 \\ + \midrule + Packets send for sending 1 piece & 1 & $Nr_{fails} + 1$\\ + \midrule + Packets send for getting 1 piece & N & $Nr_{fails} + 1$ \\ + \midrule + Packets receive for getting 1 piece & N & $Nr_{fails} + 1$ \\ + \midrule + Util packages when getting 1 piece & 1 & 1 \\ + \midrule + Recall posibility & $\approx$$\frac{100}{2^{N}}\%$ & $0\%$ \\ + \midrule + Time to getting M piece & $M * T_{fastest}$ & $T_{slowest}$ \\ + \midrule + Bandwidth used & high usage & high usage \\ + \bottomrule + \end{tabular} +\end{center} + +This comparison table highlights the advantages of \emph{swift} over BitTorrent. \emph{Swift} uses a socket per file, +whereas BitTorrent opens as many as M sockets. This is particularly useful for big files. Also, if BitTorrent connects +to a lower seeder the waiting time will be bigger compared to \emph{swift}. + +In the case of transferring many small files, BitTorrent will be better than \emph{swift}. Another case when +\emph{swift} is less than BitTorrent is for the first transfer between a peer and a seeder. Both of them use the same +bandwidth, but the overall performance appears to be better for \emph{swift} in the Internet.