\documentclass[journal]{IEEEtran}
\renewcommand{\baselinestretch}{1.4}
\newtheorem{thm}{Theorem}
\newtheorem{defin}{Definition}
\newtheorem{examp}{Example}
\usepackage{epsfig}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{pst-all}
\include{math_def}
\newcommand{\Ei}{\mathop{\mbox{\rm Ei}}}
%\newcommand{\det}{\mathop{\mbox{\rm det}}}
\begin{document}
%
% paper title
\title{A Comprehensive Stochastic Model for TCP Latency and
Throughput}
%
%
% author names and IEEE memberships
% note positions of commas and nonbreaking spaces ( ~ ) LaTeX will not break
% a structure at a ~ so this keeps an author's name from being broken across
% two lines.
% use \thanks{} to gain access to the first footnote area
% a separate \thanks must be used for each paragraph as LaTeX2e's \thanks
% was not built to handle multiple paragraphs
\author{Dong~Zheng,
Georgios~Y.~Lazarou,
Rose~Hu,
and~Junshan~Zhang%
\thanks{Parts of this paper were presented at ICC'03, \copyright IEEE 2003,
and CIIT'03, \copyright IASTED 2003}%
\thanks{Dong Zheng and Junshan Zhang are with the Department of
Electrical Engineering, Fulton School of Engineering, Arizona
State University (\{dong.zheng, junshan.zhang\}@asu.edu).}%
\thanks{Georgios Y. Lazarou and Rose Hu are with the
Department of Electrical and Computer Engineering, Mississippi
State University (\{glaz, hu\}@ece.msstate.edu).}%
}
% note the % following the last \IEEEmembership and also the first \thanks -
% these prevent an unwanted space from occurring between the last author name
% and the end of the author line. i.e., if you had this:
%
% \author{....lastname \thanks{...} \thanks{...} }
% ^------------^------------^----Do not want these spaces!
%
% a space would be appended to the last name and could cause every name on that
% line to be shifted left slightly. This is one of those "LaTeX things". For
% instance, "A\textbf{} \textbf{}B" will typeset as "A B" not "AB". If you want
% "AB" then you have to do: "A\textbf{}\textbf{}B"
% \thanks is no different in this regard, so shield the last } of each \thanks
% that ends a line with a % and do not let a space in before the next \thanks.
% Spaces after \IEEEmembership other than the last one are OK (and needed) as
% you are supposed to have spaces between the names. For what it is worth,
% this is a minor point as most people would not even notice if the said evil
% space somehow managed to creep in.
%
% The paper headers
% The only time the second header will appear is for the odd numbered pages
% after the title page when using the twoside option.
%
% *** Note that you probably will NOT want to include the author's name in ***
% *** the headers of peer review papers. ***
% If you want to put a publisher's ID mark on the page
% (can leave text blank if you just want to see how the
% text height on the first page will be reduced by IEEE)
%\pubid{0000--0000/00\$00.00~\copyright~2002 IEEE}
% use only for invited papers
%\specialpapernotice{(Invited Paper)}
% make the title area
\maketitle
\begin{abstract}
Understanding the nature of TCP behavior is critical in order to
properly engineer, operate, and evaluate the performance
of the Internet, as well as to properly design and implement
future networks. In this paper, we first develop a better and tractable model for the
congestion window growth pattern in the slow-start phase.
Using this new slow-start phase model, we construct an accurate model for
the short-lived TCP flows and then an extended and more accurate TCP steady-state model.
We validate our models with simulations and compare them against existing models.
The results show that our model for the short-lived flows yields more accurate performance
predictions (up to $20\%$)
than the ones developed in \cite{csa} and \cite{skv}. In addition, our extended
steady-state model is up to $75\%$ more accurate than the model proposed in \cite{pftk}.
\end{abstract}
\begin{keywords}
TCP, performance evaluation, stochastic model
\end{keywords}
% Note that keywords are not normally used for peerreview papers.
% For peer review papers, you can put extra information on the cover
% page as needed:
% \begin{center} \bfseries EDICS Category: 3-BBND \end{center}
%
% For peerreview papers, inserts a page break and creates the second title.
% Will be ignored for other modes.
\IEEEpeerreviewmaketitle
\section{Introduction}
A multitude of Internet applications, such as the world wide web, usenet
news, file transfer and remote login, have opted TCP as the
transport mechanism. Thus, TCP greatly influences the performance of Internet~\cite{kt,kg},
and a well-designed TCP is of utmost importance to the
level of satisfaction of Internet users. Several stochastic TCP models
have been proposed~\cite{csa}-\cite{pftk},~\cite{hot} for predicting its performance in terms of latency and throughput.
Considerable emphasis has been given into better understanding of the
dynamics of TCP and its sensitivity to
network parameters, such as the TCP round trip time and the packet loss rate.
Understanding the impact of TCP dynamics on its performance is critical for
optimizing TCP and the design of active queue management techniques~\cite{sk,to} and
TCP-friendly multicast protocols \cite{jt,vrc}. However, TCP is a very
complex protocol, and the fast-changing network conditions make
the development of an accurate TCP stochastic model to be a very
challenging task.
Stochastic models of TCP can be classified into three classes:
(1) steady-state models for predicting the performance of bulk transfer flows~\cite{bgh,pftk},
(2) models for short-lived flows assuming low loss rates~\cite{ps,hot,jm},
and (3) models that combine the two above models~\cite{skv,csa}.
To the best of our knowledge, none of the steady-state models proposed
so far account for the slow-start phase which begins at the end of every
single time-out. The work in \cite{pftk} assumes that the
slow-start phase happens less frequently than the congestion-avoidance
phase and the throughput in the slow-start phase is less than that in the
congestion-avoidance phase, and that the slow-start phase can be
ignored safely. While this could be the case with very small loss rates,
the assumption does not hold in general. Empirical
measurements show that $90\%$ of the packet losses
lead to time-outs~\cite{pftk}. Since TCP enters the slow-start
phase when a time-out occurs, accurate TCP performance
models must take into consideration of the aggregate effects of the
slow-start phases.
All steady-state models assume the availability of unlimited data to
send. Hence, the impact of the transient phase on
performance is considered insignificant, and therefore is ignored. These models
work well only for predicting the TCP send rate or the throughput of bulk data
transfers, and are not applicable to predicting the
performance of short-lived TCP flows.
It is noted in \cite{cbm}-\cite{tmsl} that the majority of TCP traffic in the
Internet consists of short-lived flows, i.e., the transmission comes to
an end during the slow-start phase before switching to the
congestion-avoidance phase. Hence, new models are needed that are
capable of predicting the performance of short-lived TCP
flows; and this is one of the main subject of our study.
In this paper, we first develop a better and tractable model for the
congestion window growth pattern in the slow-start phase.
Using this new slow-start phase model, we construct an accurate model for
the short-lived TCP flows and then an extended and more accurate TCP steady-state model.
Major improvement in both models is achieved by relaxing key assumptions and
enhancing critical approximations that have been made in existing popular models.
The remainder of the paper is organized as follows. Section
\ref{sec:model} presents an analysis in developing the improved and extended steady-state model.
Section \ref{sec:st} builds the stochastic model for short-lived flows.
In section \ref{sec:sim}, both models are validated with simulations and compared
against existing models. Finally, Section \ref{sec:con} concludes the
paper.
\section{Steady-state Model Incorporating the Slow-start Phase} \label{sec:model}
\subsection{Assumptions}
As in \cite{pftk}, we develop our models based on the BSD TCP Reno release
\cite{stevens}. We assume that the link speed is very high,
the round-trip time (RTT) remains fairly constant at all times, and the sender
sends full-sized segments whenever the congestion window ($cwnd$)
allows. The advertised window is assumed to be always a constant and large. Thus, the
congestion window evolution alone determines the send rate,
which roughly can be described as $cwnd/RTT$.
We model the dynamics of TCP in terms of ``rounds'' as done in
\cite{pftk}. A round starts when a window of packets is sent by the
sender and ends when one or more acknowledgments are received for these
packets. The effect of the delayed acknowledgment is taken into
consideration, but neither the Nagle algorithm nor the silly window
syndrome avoidance is considered. In addition, we assume that the packet
losses are in accordance with the bursty loss model. The packet losses
in different rounds are independent, but they are correlated within a
single round; that is, if one packet in a round is lost, then the following
back to back packets in the same round are also assumed to be lost. This
is an idealization of the packet loss dynamics observed in the paths
where FIFO drop-tail queues are used~\cite{csa}. Finally, we assume that the sender
has unlimited data to send.
%--------------------------------------------------------------------------------------
\subsection{Model Development}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/newtra.eps}
\end{center}
\caption{The extended steady state model - evolution of congestion
window size when loss indications are triple-duplicate ACK's and
time-outs.}
\label{equmodel}
\end{figure}
Fig. \ref{equmodel} depicts an instance of the congestion window's
evolution over time. As shown in the figure, when a time-out occurs due to
lost packets, TCP enters into the slow-start phase to recover from a perceived
network congestion.
Let TDP be the period between two triple-duplicate ($TD$)
losses, $Z^{ss}_i$ be the time spent in the
slow-start phase, $Z^{TD}_i$ be the duration of the congestion-avoidance
phase, and $Z^{TO}_i$ be the time interval of the time-out phase. Let
$M_i$ be the number of packets sent during the total time $S_i$. Then,
we have that
\begin{eqnarray}
M_i & = & Y^{ss}_{i} + \sum^{n_i}_{j=1}Y_{ij}+R_i, \label{eq:mi}\\
S_i & = & Z_i^{ss} + Z^{TD}_i +Z_i^{TO} \nonumber\\
& = & Z_i^{ss} + \sum^{n_i}_{j=1}A_{ij} +Z_i^{TO}, \label{eq:si}
\end{eqnarray}
where $Y^{ss}_{i}$ is the number of packets sent during the slow-start
phase, $A_{ij}$ is the duration of the $j$th TDP, $n_{i}$ is the total
number of the TDPs in the interval $Z_i^{TD}$, $Y_{ij}$ is the number
of packets sent during the $j$th TDP of interval $Z^{TD}_i$, and $R_i$
is the number of packets sent during the time-out phase. $W^{ss}_i$ is
the window size at the end of a slow start and finally $W^{TD}_{ij}$ is
the window size at the end of the $j^{th}$ TDP.
Assuming ${(S_i, M_i)}$ to be a sequence of independent and identically
distributed (i.i.d.) random variables, we determine the send rate
as
\begin{displaymath}
B=\frac{E[M]}{E[S]}.
\end{displaymath}
Considering ${n_i}$ to be i.i.d. random variables and independent of
${Y_{ij}}$ and ${A_{ij}}$, we have
\begin{eqnarray}
B & = & \frac{ E[Y^{ss}] + E[\sum^{n_i}_{j=1}Y_{ij}]+E[R]}{ E[Z^{ss}] +
E[\sum^{n_i}_{j=1}A_{ij}]+E[Z^{TO}]} \nonumber \\
& = & \frac{ E[Y^{ss}] + E[n]E[Y] + E[R] }{E[Z^{ss}] + E[n]E[A] +
E[Z^{TO}]} \label{eq:bth} .
\end{eqnarray}
We next derive the closed form expressions for these
expected values in the different TCP phases: the slow-start, the
congestion-avoidance and the time-out phases.
%---------------------------------------------------------------------------
\subsubsection{The Slow-Start Phase} \label{sec:sl}
According to TCP Reno \cite{jacob,stevens}, the current state of a TCP
connection is determined based upon the values of the congestion window
size ($cwnd$) and the slow-start threshold ($ssthresh$). If $cwnd$ is
less than $ssthresh$, TCP is in the slow-start phase, otherwise, it is
in the congestion-avoidance phase.
Since TCP has no knowledge of the network conditions, during the
slow-start phases, it probes for the available bandwidth ``greedily'',
i.e., it increases the $cwnd$ by one upon the receipt of a non-repeated
acknowledgment. This algorithm can be formulated as
\begin{equation}
cwnd_i = \lceil \frac{cwnd_{i-1}}{2} \rceil + cwnd_{i-1}, \label{eq:discrete_cwnd}
\end{equation}
in which $cwnd_i$ is the congestion window size for the $i^{th}$
round. Equation (\ref{eq:discrete_cwnd}) is due to the fact that
assuming no loss, in round ($i-1$), there is a total of $cwnd_{i-1}$
packets sent to the destination, which, in turn, causes the receiver to
generate $\lceil cwnd_{i-1}/2 \rceil$ acknowledgments\footnote{$\lceil x
\rceil = $the smallest integer bigger than $x$.}. According to the
slow-start algorithm, upon receiving these ACKs, the sender increases
the $cwnd$ by the number of ACKs it has obtained, which is $\lceil
cwnd_{i-1}/2 \rceil$.
Noting that the congestion window is an integer, we can simplify
(\ref{eq:discrete_cwnd}) as follows\footnote{In deriving a
model for the latency of the short-lived TCP flows,
(\ref{eq:sim_discrete_cwnd}) was approximated in \cite{csa} as:
$cwnd_i = 3cwnd_{i-1}/2$.}:
\begin{equation}
cwnd_i = \lceil \frac{3}{2}cwnd_{i-1} \rceil
.\label{eq:sim_discrete_cwnd}
\end{equation}
Rearranging, we get
\begin{eqnarray}
\lceil \frac{cwnd_{i-1}}{2} \rceil & = & \lceil \frac{1}{2}\lceil
\frac{3}{2}cwnd_{i-2} \rceil \rceil \nonumber \\
& \approx & cwnd_{i-2} .
\end{eqnarray}
Substituting this in (\ref{eq:discrete_cwnd}), we get the following:
\begin{equation}
cwnd_i \approx cwnd_{i-2} + cwnd_{i-1} . \label{eq:t_discrete_cwnd}
\end{equation}
In order to examine the accuracy of this approximation, a
typical evolution of $cwnd$ is given as follows:
\begin{displaymath}
1, 2, 3, 5, 8, 12, 18, 27...
\end{displaymath}
Compared with the sequence generated by (\ref{eq:t_discrete_cwnd}):
\begin{displaymath}
1, 2, 3, 5, 8, 13, 21, 34...
\end{displaymath}
and the evolution of $cwnd$ proposed by the model in \cite{csa}:
\begin{displaymath}
1, 1.5^1, 1.5^2, 1.5^3, 1.5^4, 1.5^5, 1.5^6, 1.5^7...
\end{displaymath}
or calculated as
\begin{displaymath}
1, 1.5, 2.25, 3.38, 5.06, 7.59, 11.39, 17.09 ...
\end{displaymath}
The similarity between the two previous sequences and the discrepancy
between the real evolution of $cwnd$ with the proposed model in
\cite{csa} show that (\ref{eq:t_discrete_cwnd}) gives a better
approximation of the slow-start phase.
Noting that (\ref{eq:t_discrete_cwnd}) generates the Fibonacci sequence,
we can therefore express $cwnd$ as follows:
\begin{equation}
cwnd_n = C_1X^n_1 + C_2X^n_2 \label{eq:c_cwnd}, \textrm{ n = }1, 2, 3 ...
\end{equation}
where\footnote{$X_1$ is also called the golden number which will be
denoted as $g$ in the later parts of this paper.}
\begin{equation}
X_{1,2} = \frac{ 1 \pm \sqrt{5}}{2}.
\end{equation}
$C_1$ and $C_2$ are determined by the initial value of $cwnd$. Assuming
the initial value of $cwnd$ is $1$, we get
\begin{equation}
C_{1,2} = \frac{ 5 \pm \sqrt{5}}{10}.
\end{equation}
By knowing the evolution of the congestion window, we can calculate the
total number of packets, $Y_n^{ss}$, that are sent until the $n_{th}$
round, by summing the congestion window size during each round:
\begin{eqnarray}
Y_n^{ss} & = & \sum_{i=1}^{n}{cwnd_i} \nonumber \\
& = & C_1X^{n+2}_1 + C_2X^{n+2}_2 - 2 \nonumber \\
& \approx & C_1X^{n+2}_1 - 2 .\label{eq:a_datan}
\end{eqnarray}
The last approximation is due to the fact that:
\begin{equation}
C_2X^{n+2}_2 \le | \frac{5-\sqrt{5}}{10}\times (\frac{1-\sqrt{5}}{2})^3
| = 0.065 \nonumber
\end{equation}
Thus, from Equation (\ref{eq:a_datan}), the number of rounds, $n$, can
be computed as:
\begin{equation}
n = log_{g}(\frac{Y_n^{ss}+2}{C_1}) - 2 .\label{eq:nequation}
\end{equation}
Substituting (\ref{eq:nequation}) into (\ref{eq:c_cwnd}), we can get the
approximate relationship between the congestion window size and the
total number of packets that have been sent, as follows:
\begin{equation}
cwnd_n = \frac{Y_n^{ss} + 2}{g^2} . \label{eq:c_d_r}
\end{equation}
Taking the expectation of both sides of Equation (\ref{eq:c_d_r}), we have:
\begin{equation}
E[W^{ss}] = \frac{E[Y^{ss}]+2}{g^2} \label{eq:twss}
\end{equation}
in which $E[W^{ss}]$ is the expectation of $cwnd_n$.
If the slow-start phase is ended by a packet loss, the expected data
that have been sent during this phase can be calculated as
\begin{equation}
E[Y^{ss}] = \frac{1-p}{p},
\end{equation}
where $p$ is the loss rate.
Substituting the value of $E[Y^{ss}]$ in (\ref{eq:twss}), we get:
\begin{equation}
E[W^{ss}]^* = \frac{1+p}{pg^2} . \label{eq:fwssy}
\end{equation}
This is the expected value of the congestion window when the slow-start
phase ends due to a lost packet. Observing that when $p$ is small, the
expected value would be much bigger than the expected value of
$ssthresh$, i.e.,
\begin{equation}
E[W^{ss}]^* \gg E[ssthresh]=\frac{E[W^{TD}]}{2},
\end{equation}
where the last equality comes from the fact that after each time-out,
the slow-start threshold is set to half of the current congestion window
$W^{TD}$.
Thus, it is safe to assume that TCP enters the congestion-avoidance
phase before a packet gets lost. That is, we assume that TCP always
switches from the slow-start to congestion-avoidance phase when the
congestion window reaches the value of $ssthresh$. We show the proof of this
in the next section after obtaining the closed-form solution of
$E[W^{TD}]$.
As a consequence, we have that the expected congestion window size at the end
of the slow start be constrained by the limitation of the slow-start threshold:
\begin{equation}
E[W^{ss}] = E[ssthresh] = \frac{E[W^{TD}]}{2} .\label{eq:fwss}
\end{equation}
Using (\ref{eq:fwss}) in (\ref{eq:twss}) and rearranging, we obtain the
expected number of packets sent during the slow-start phase:
\begin{equation}
E[Y^{ss}] = \frac{E[W^{TD}]g^2}{2} - 2. \label{eq:fyss}
\end{equation}
The time spent in the slow-start phase is
obtained by multiplying the number of rounds described in (\ref{eq:nequation}) with RTT:
\begin{equation}
E[Z^{ss}] = log_{g}\bigg(\frac{E[W^{TD}]}{2C}\bigg)\cdot RTT \label{eq:fzss}.
\end{equation}
%--------------------------------------------------------------------------------
\subsubsection{The Congestion-Avoidance Phase}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/dong}
\end{center}
\caption{Packets sent during a TDP. Adopted from \cite{pftk}.}
\label{fig:dong}
\end{figure}
Let $Y_i$ be the number of packets sent during the $i$th TDP, $A_i$ be
the duration, and $W_i^{TD}$ be the window size at the end of the TDP.
With reference to Fig. \ref{fig:dong}, we obtain the following relations
\cite{pftk}\footnote{For details see \cite{pftk}. Note that
(\ref{eq:wi}) captures more accurately the window size at the end of
the TDP than the one presented in \cite{pftk}.
}:
\begin{eqnarray}
Y_i & = & \alpha_i + W^{TD}_i - 1 ,\label{eq:yi} \\
A_i & = & \sum^{X_i+1}_{j=1}r_{ij} ,\label{eq:ai} \\
W^{TD}_i & = & \frac{W^{TD}_{i-1}}{2} + \frac{X_i}{b} - 1, \label{eq:wi}
\end{eqnarray}
and
\begin{eqnarray}
Y_i & = & \frac{X_i}{2}\bigg(\frac{W^{TD}_{i-1}}{2}+W^{TD}_i-1\bigg)+\beta_i \label{eq:fin} ,
\end{eqnarray}
where $X_i$ is the penultimate round in the TDP which experiences packet
losses, $r_{ij}$ is the round trip time, $\alpha_i$ is the number of
packets sent in a TDP until the first loss happens, $b$ is the number of
packets acknowledged by a received ACK, and $\beta_i$ is the
number of packets sent in the fast retransmit phase, which is the last
round \cite{pftk}.
Based on our assumptions, $\alpha_i$ is obviously geometrically distributed. Hence,
\begin{equation}
P[\alpha_i=k] = (1-p)^{k-1}p, \qquad k=1, 2, \ldots \label{eq:al}
\end{equation}
and therefore, we have that
\begin{eqnarray}
E[Y] & = & \frac{1-p}{p} + E[W^{TD}] . \label{eq:eyi}
\end{eqnarray}
In addition, based on (\ref{eq:wi}) and (\ref{eq:fin}), we also have that
\begin{small}
\begin{eqnarray}
E[X] & = & b\bigg(\frac{E[W^{TD}]}{2} + 1\bigg) \label{eq:ewi}\\
E[Y] & = &
\frac{E[X]}{2}\bigg(\frac{E[W^{TD}]}{2}+E[W^{TD}]-1\bigg)+E[\beta]
,\label{eq:efin}
\end{eqnarray}
\end{small}
where we assume ${X_i}$ and ${W^{TD}_i}$ are mutually
independent. Combining (\ref{eq:eyi}), (\ref{eq:ewi}), and
(\ref{eq:efin}), we get
\begin{footnotesize}
\begin{eqnarray}
\frac{1-p}{p} + E[W^{TD}] & = & \frac{E[X]}{2}\bigg(\frac{E[W^{TD}]}{2}+E[W^{TD}]-1\bigg)+E[\beta] \nonumber\\
& = & \frac{b(\frac{E[W^{TD}]}{2} + 1)}{2} \times \nonumber\\
& & \bigg(\frac{E[W^{TD}]}{2}+E[W^{TD}]-1\bigg)+E[\beta] . \label{eq:final}
\end{eqnarray}
\end{footnotesize}
Since $\beta_i$ is the number of packets sent when $k$ packets in the
penultimate round are ACKed, its value equals to $k$ with probability
\begin{equation} \label{eq:awk}
A(w, k) = \frac{(1-p)^kp}{1-(1-p)^w} .
\end{equation}
Therefore,
\begin{small}
\begin{eqnarray}
E[\beta] & = & E\big[\sum_{k=0}^{w-1}k \cdot P(\beta = k)|w\big] \nonumber\\
& = & E\big[\sum_{k=0}^{w-1}\frac{k(1-p)^kp}{1-(1-p)^w}|w\big] \nonumber\\
& = &
E\bigg[\frac{(1-p)(1-pw(1-p)^{w-1}-(1-p)^w)}{p(1-(1-p)^w)}|w\bigg]
\nonumber \\
& \approx & (E[W^{TD}]-1)(1-p) \label{eq:smfb}
\end{eqnarray}
\end{small}
for p small. Using (\ref{eq:smfb}) in (\ref{eq:final}) and rearranging, we get:
\begin{footnotesize}
\begin{equation} \label{eq:eow}
E[W^{TD}] = -\frac{2(b-2p)}{3}+\sqrt{\frac{4(bp+2(1-p^2))}{3bp} +
{\big(\frac{2b-4p}{3b}\big)}^2}.
\end{equation}
\end{footnotesize}
Inserting (\ref{eq:eow}) in (\ref{eq:ewi}), we obtain
\begin{small}
\begin{equation}
E[X] = \frac{(2p+3)b-b^2}{3}+\sqrt{\frac{b^2p+2b(1-p^2)}{3p} + (\frac{b-2p}{3})^2} ,
\end{equation}
\end{small}
and
\begin{eqnarray}
E[A] & = & (E[X] + 1)E[r] \nonumber\\
& = & RTT \Bigg( -\frac{b^2-(2p+6)b}{3} \nonumber \\
& & + \sqrt{\frac{b^2p+2b(1-p^2)}{3p} + (\frac{b-2p}{3})^2} \Bigg) , \label{eq:fea}
\end{eqnarray}
where we assume $r_{ij}$'s to be i.i.d. and $E[r] \approx RTT$.
In the previous subsection, we stated without proof that the slow-start
phase will enter the congestion-avoidance phase before a packet loss
happens. This can be proved if $E[W^{ss}]^*$, the expected congestion
window size at the end of the slow-start phase due to a packet loss, is
bigger than the value of $E[ssthresh] = E[W^{TD}]/2$, which is the
expected threshold at the beginning of the slow-start phase. In other
words, we need to show that:
\begin{equation}
\frac{1+p}{pg^2} \ge \frac{E[W^{TD}]}{2}, \label{eq:inequality}
\end{equation}
where $E[W^{TD}]$ is given by (\ref{eq:eow}). This is easily shown
below, under the (normal) condition that $p$ is small:
\begin{eqnarray}
\frac{1+p}{pg^2} & \ge & \sqrt{\frac{2}{3bp}} \nonumber \\
\Leftrightarrow 1 - 0.3p + p^2 & \ge & 0 \nonumber
\end{eqnarray}
The last inequality stands obviously. In fact,
(\ref{eq:inequality}) is valid $\forall p\in[0,1]$.
%--------------------------------------------------------------------------------------------
\subsubsection{The Time-out Phases}
The probability that a loss indication is a time-out under the current
congestion window size $w$, is given in \cite{pftk} as:
\begin{equation}
min \big ( 1, \frac{(1-(1-p)^3)(1+(1-p)^3(1-(1-p)^{w-3}))}{1-(1-p)^w} \big ) \\
\end{equation}
which gets simplified when the loss rate, $p$, is small:
\begin{equation}
Q^{TD}(w)= min(1, \frac{3}{w}). \nonumber
\end{equation}
Thus, $Q^{TD}$, the expected probability that a loss leads to a time-out
at the end of the congestion-avoidance phase, is approximated in
\cite{pftk} as follows:
\begin{eqnarray}
Q^{TD} & = & E[Q^{TD}(w)] \nonumber \\
& = & min(1, \frac{3}{E[W^{TD}]}) . \label{eq:pfqtd}
\end{eqnarray}
The traffic traces collected in \cite{pftk} indicate that the effect of
the time-outs must always be captured by any TCP performance prediction
model. In most of the traces, time-out events out-numbered the fast
retransmit events, i.e., $Q^{TD}$ is around $90\%$ of the total loss.
This value is larger than the value given by the formula of
(\ref{eq:pfqtd}), as we further calculated that the $E[W^{TD}]$ is
greater than $10$, which, in turn, renders the $Q^{TD}$ to be less than
$30\%$. So, we believe that this approximation underestimates the real
$Q^{TD}$. As a matter of fact, the underestimation of $Q^{TD}$ in
\cite{pftk} is due to the approximation of $E[1/W] \approx 1/E[W]$ by
noting that:
\begin{displaymath}
\begin{array}{cccc}
& {E[(\frac{1}{\sqrt{W}})(\sqrt{W})]}^2 & \le & E[({\frac{1}{\sqrt{W}}})^2]E[({\sqrt{W}})^2] \\
\Longrightarrow & \frac{1}{E[W]} & \le & E[\frac{1}{W}]
\end{array}
\end{displaymath}
The equality holds only when $W$ is a constant.
Now, using Taylor's formula and expectation properties, we obtain the
following\footnote{See Appendix~\ref{ap:ew} for the derivation.}
\begin{equation}
E[\frac{1}{W}] \approx \frac{1}{E[W]}(1+\frac{Var(W)}{E[W]^2}). \label{eq:lastw}
\end{equation}
Hence, to find a more accurate approximation of $Q^{TD}$, we must find the variance of $W$.
After a rigorous analysis\footnote{See Appendix~\ref{ap: va} for
details.}, we obtain the variance of $W^{TD}$, the congestion window
size at the end of TDP, to be:
\begin{equation}
Var[W^{TD}]_{p \to 0} \approx \frac{8(\sqrt{3}-1)}{3bp}. \label{eq:rvw}
\end{equation}
Substituting (\ref{eq:eow}) and (\ref{eq:rvw}) into (\ref{eq:lastw}), we get:
\begin{eqnarray}
E[\frac{1}{W^{TD}}] & = & \frac{1}{E[W^{TD}]}(1+\frac{Var(W^{TD})}{E[W^{TD}]^2}) \nonumber\\
& = & \frac{1}{E[W^{TD}]}(1+\frac{\frac{8(\sqrt{3}-1)}{3bp}}{\frac{8}{3bp}}) \nonumber\\
& = & \frac{\sqrt{3}}{E[W^{TD}]} .\label{eq:cwe}
\end{eqnarray}
Equation (\ref{eq:cwe}) gives a better, but still simple,
estimation of $E[1/W^{TD}]$. Then, $Q^{TD}$, the probability that a loss
detection is a time-out (TO), can be found to be:
\begin{eqnarray}
Q^{TD} & \approx & min(1, \frac{3\sqrt{3}}{E[W^{TD}]}) .\label{eq:fqtd}
\end{eqnarray}
The probability of $n_i$, the number of TDPs, is derived according to $Q^{TD}$:
\begin{displaymath}
p(n_i = k) = (1-Q^{TD})^{(k-1)}\cdot Q^{TD} .
\end{displaymath}
This is due to the fact that, with probability $Q^{TD}$, the packets
lost at the end of the congestion control phase lead to a TO, and,
with probability $1-Q^{TD}$ the TCP connection stays in TDP.
By taking the expectation of $n_i$, we get:
\begin{eqnarray}
E[n] & = & \frac{1}{Q^{TD}} .\label{eq:fn}
\end{eqnarray}
The expressions for the number of packets sent in the time-out phase,
$E[R]$ and its duration, $E[Z^{TO}]$ are given in \cite{pftk} as:
\begin{eqnarray}
E[R] & = & \frac{1}{1-p} \label{eq:fr}\\
E[Z^{TO}] & = & T_0\frac{f(p)}{1-p} , \label{eq:fzto}
\end{eqnarray}
where $f(p)$ is defined as:
\begin{equation}
f(p) = 1+p+2p^2+4p^3+8p^4+15p^5+32p^6 .\label{eq:fp}
\end{equation}
%-----------------------------------------------------------------------------------------------
\subsubsection{The Steady State Send Rate and Throughput}
Substituting Equations (\ref{eq:fyss}), (\ref{eq:fzss}), (\ref{eq:eyi}),
(\ref{eq:eow}), (\ref{eq:fea}), (\ref{eq:fqtd}), (\ref{eq:fn}),
(\ref{eq:fr}) and (\ref{eq:fzto}) into (\ref{eq:bth}), and taking into
consideration the limitation of the window size~\cite{pftk}, we finally derive the send rate as:
\begin{equation} \label{eq:fsr}
\begin{small}
B = \left\{ \begin{array}{l}
\frac{\frac{E[W^{TD}]g^2}{2} - 2
+\frac{1}{Q^{TD}(E[W^{TD}])}(\frac{1-p}{p}+E[W^{TD}])+\frac{1}{1-p}}{\big
(
log_g(\frac{E[W^{TD}]}{2C_1})+\frac{1}{Q^{TD}(E[W^{TD}])}(\frac{bE[W^{TD}]}{2}+b+1)
\big ) RTT+\frac{f(p)T_0}{1-p}} \\
\hspace{45 mm} \textrm{when } E[W^{TD}] < W_{m} \\
\frac{\frac{W_mg^2}{2} - 2
+\frac{1}{Q^{TD}(W_m)}(\frac{1-p}{p}+W_m)+\frac{1}{1-p}}{log_g(\frac{W_m}{2C_1})RTT+\frac{1}{Q^{TD}(W_m)}((\frac{b}{8}W_m+\frac{1-p}{pW_m}+2)+1)RTT+\frac{f(p)T_0}{1-p}}
\\
\hspace{45 mm} \textrm{ when } E[W^{TD}] \ge W_{m} .
\end{array} \right.
\end{small}
\end{equation}
This can be further simplified as
\begin{equation}
\begin{array}{l}
min(\frac{W_m}{RTT}, \\
\frac{1}{RTT\sqrt{\frac{2bp}{3}}+min\bigg( 1, 9\sqrt{\frac{bp}{8}}
\bigg)p(\frac{RTT}{2} log_g(\frac{2}{3bpC_1^2})+T_0(1+32p^2))})
.\label{eq:fssr}
\end{array}
\end{equation}
To derive the throughput, we only need to change $E[Y]$, the expected
size of packets that have been sent in a TDP, to $E[Y']$, the expected
size of packets that have been received in a TDP. $E[Y']$ can be
expressed as:
\begin{equation}
E[Y']= E[\alpha]+E[\beta]-1 ,
\end{equation}
where $E[\alpha]$ is $1/p$ and $E[\beta]$ is given by Equation (\ref{eq:smfb}).
Also we substitute $E[R]$ with $E[R']$, the expected number of packets
received in the time out phase, where \cite{pftk}
\begin{equation}
E[R'] = 1 \nonumber
\end{equation}
Thus, the throughput can be formulated as:
\begin{equation}
H = \frac{ E[Y^{ss}] + E[n]E[Y'] + E[R'] }{E[Z^{ss}] + E[n]E[A] +
E[Z^{TO}]} \label{eq:trou}
\end{equation}
or
\begin{equation} \label{eq:troud}
\begin{small}
H = \left\{ \begin{array}{l}
\frac{\frac{E[W^{TD}]g^2}{2} - 2
+\frac{1}{Q^{TD}(E[W^{TD}])}(\frac{1-p}{p}+(E[W^{TD}]-1)(1-p))+1}{\big
(
log_g(\frac{E[W^{TD}]}{2C_1})+\frac{1}{Q^{TD}(E[W^{TD}])}(\frac{bE[W^{TD}]}{2}+b+1)
\big ) RTT+\frac{f(p)T_0}{1-p}} \\
\hspace{45 mm} \textrm{when } E[W^{TD}] < W_{m} \\
\frac{\frac{W_mg^2}{2} - 2
+\frac{1}{Q^{TD}(W_m)}(\frac{1-p}{p}+(W_m-1)(1-p))+1}{log_g(\frac{W_m}{2C_1})RTT+\frac{1}{Q^{TD}(W_m)}((\frac{b}{8}W_m+\frac{1-p}{pW_m}+1)+1)RTT+\frac{f(p)T_0}{1-p}}
\\
\hspace{45 mm} \textrm{ when } E[W^{TD}] \ge W_{m}
\end{array} \right.
\end{small}
\end{equation}
which, when p is small, can be simplified as (\ref{eq:fssr}). This can
be explained by noting that, if a loss seldom happens, then the send
rate should just equal to the throughput.
Fig. \ref{fig:diff} compares our model against the one proposed in \cite{pftk}.
It shows the predicted throughput difference versus $p$ for the case of $RTT = 200\ ms$,
$MSS = 536$ bytes, $w_1 = 1$ segment, $T_0 = 1$ sec, $W_m = 20$ segments, and $b = 2$.
With both models, when $p \to 0$, then $H \to W_m/RTT$. However, for $10^{-3} < p < 10^{-1}$,
the model in \cite{pftk} overestimates the throughput by up to a factor of 2.5 (at $p \approx 10^{-2}$).
Obviously, when $p \to 1$, again both models obtain the same performance values.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/diff_m.eps}
\end{center}
\caption{Our proposed model is compared with the one developed in \cite{pftk} in terms
of the predicted throughput difference versus the loss rate ($p$) for the case of: $RTT =
200ms$, $MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$, $W_m =
20segments$, $b =2$. }
\label{fig:diff}
\end{figure}
%===============================================================================================
\section{Stochastic Model for Short-lived Flows} \label{sec:st}
Our proposed model for the short-lived TCP flows is partially
based on our results given in Section~\ref{sec:sl}. In addition, it is
composed of four parts according to a typical short-lived flow evolution:
the start of the connection (three-way-handshake), the initial slow-start phase,
the first loss, and the subsequent losses. We first derive the latency
a flow experience in each part, and then sum them to obtain the total
latency.
%--------------------------------------------------------------------------------
\subsection{The Connection Start-up Phase}
Every TCP connection starts with the three-way-handshake
process. Assuming that no ACK packets can get lost, this process can be
well modeled as follows \cite{csa}:
\begin{equation}
E[T_{twhs}] = RTT + T_s(\frac{1-p}{1-2p} - 1 ) \label{eq:three}
\end{equation}
where $T_s$ is the duration of SYN time-out and $p$ is the packet loss rate.
We further assume that two or more time-outs within the three-way-handshake process
is very rare. Otherwise, the slow-start threshold would get set to one, and
therefore, the connection would get forced directly into the congestion-avoidance phase
instead of into the slow-start phase.
%------------------------------------------------------------------------------
\subsection{The Initial Slow-start Phase}
After the three-way-handshake, the slow-start phase begins.
In this phase, the sender's congestion window ($cwnd$) increases
exponentially until either of the following two events occur: a packet
gets lost or the $cwnd$ reaches its maximum value $W_m$.
In order to derive the latency for this phase,
$E[Y_{init}]$, the expected number of packets sent until a loss occurs
is given by the following enhanced equation (based on the one given in
\cite{csa}):
\begin{equation}
E[Y_{init}] = \frac{(1-(1-p)^d)(1-p)}{p} \label{eq:finit}
\end{equation}
where $d$ is the total file size measured in packets that must be
transmitted.
Substituting (\ref{eq:finit}) in (\ref{eq:twss}), we obtain the expected congestion
window size at the end of the slow-start phase due to packet losses
as:
\begin{equation}
E[W_{init}]=\frac{(1-(1-p)^d)(1-p)+2p}{pg^2}
\end{equation}
If $E[W_{init}] > W_m$, then the congestion
window first grows to $W_m$ and then remains there while sending the
rest of the packets. Thus, the whole procedure is divided into two parts
\cite{csa}. From (\ref{eq:twss}), the number of packets sent
when the $cwnd$ grows to $W_m$ is given by:
\begin{equation}
data_1 = g^2\cdot W_m - 2. \label{eq:d1}
\end{equation}
Substituting (\ref{eq:d1}) into (\ref{eq:nequation}), we can obtain the
duration of this step measured in rounds:
\begin{equation}
n_1 = log_g(\frac{W_m}{C_1}) .
\end{equation}
In the second part,
\begin{equation}
n_2 = \frac{E[Y_{init}]-data_1}{W_m}
\end{equation}
rounds are needed to transmit the remaining $E[Y_{init}]-data_1$ packets.
Combining the previous results together and using (\ref{eq:nequation})
for the $E[W_{init}] \le W_m$ case, the expected slow-start latency is
computed as follows:
\begin{equation} \label{eq:senequation}
E[n] = \left\{ \begin{array}{l}
[ \lceil log_{g}(\frac{W_{m}}{C_1}) \rceil+
\frac{1}{W_{m}}(E[Y_{init}]-g^2W_m -2 )] \\
\hspace{38 mm} \textrm{when } E[W_{init}] > W_{m} \\
\lceil log_{g}(\frac{E[Y_{init}]+2}{C_1}) \rceil - 2 \hspace{5mm}
\textrm{when } E[W_{init}] \le W_{m}
\end{array} \right.
\end{equation}
%-------------------------------------------------------------------------------
\subsection{The First Loss}
The initial slow-start phase ends when a packet loss is detected with a
probability of $1-(1-p)^d$. When a packet gets lost, it could cause
retransmission time-out (RTO) or lead to a triple duplicate ACKs, in
which case TCP could recover in a round or two by using the fast
retransmit and the fast recovery mechanism. We first derive the probability
that a packet loss leads to a time-out (TO).
Due to the exponential growing pattern of $cwnd$ in the slow-start
phase, $Q^{ss}$, the probability that a packet loss leads to a TO is different from
the probability that when the sender is in the congestion-avoidance
phase. With reference to Fig. \ref{fig:qss}, we derive the expression of $Q^{ss}$ as follows.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/qss.eps}
\end{center}
\caption{An illustration of a triple-duplicate (TD) event.}
\label{fig:qss}
\end{figure}
In the round with a TD event, let $W^{ss}$ be the
current size of $cwnd$, which has a value $w$. In this round, $w$
packets were sent. Among them, $k$ packets are assumed to be ACKed.
Since the connection is still in the slow-start phase, $cwnd$ increases
to $w+k$ and another $2k$ packets are sent in the next
round\footnote{The delayed acknowledgment concept is not applied here,
but we show later that it does not affect the analysis of the
$Q^{ss}$.}. If more than three packets from these $2k$ packets get
ACKed, then a TD would occur; otherwise, a TO would take place.
Letting
\begin{equation}
h(m) = \sum^2_{i=0}(1-p)^ip \hspace{10mm} \textrm{ if } m \ge 3 ,
\end{equation}
be the probability that no more than 2 packets have been transmitted
successfully in a round of $m$ packets,
we then obtain $Q^{ss}$ to be
\begin{equation} \label{eq:qsswssslf}
Q^{ss}(W^{ss}) = \left\{ \begin{array}{l}
1, \hspace{36mm} W^{ss} \le 2 \\
\sum^1_{k=0}A(W^{ss}, k) \\
+ \sum^{W^{ss}-1}_{k=2}A(W^{ss}, k)h(2k), \textrm{ otherwise},
\end{array} \right.
\end{equation}
where $A(w, k)$ is as given by (\ref{eq:awk}) and gives
the probability that the first $k$ packets have been successfully
transmitted and ACKed in a round of $w$ packets, provided that there
might be one or more packets got lost. Simplifying (\ref{eq:qsswssslf}),
we get $Q^{ss}(W^{ss})$ to be equal to
\begin{small}
\begin{equation} \label{eq:mqss}
min\bigg( 1,
\frac{p(2-p)+(1-(1-p)^3)(1-p)^2(1-(1-p)^{W^{ss}-2})}{1-(1-p)^{W^{ss}}}
\bigg).
\end{equation}
\end{small}
As $p$ approaches zero, (\ref{eq:mqss}) reduces to
\begin{equation} \label{eq:msqss}
Q^{ss} = \lim_{p \rightarrow 0}E[Q^{ss}(W^{ss})] = min \bigg( 1,
\frac{2}{E[W^{ss}]} \bigg) .
\end{equation}
In case of delayed acknowledgment, $k$ successfully received packets
generate $ \lfloor k/2 \rfloor$\footnote{ $ \lfloor k/2 \rfloor$ is the
biggest integer small than $k/2$.} ACKs, and thus the size of the
$cwnd$ increases to $\lfloor k/2 \rfloor + w $ and $\lfloor k/2 \rfloor
+ k$ packets are sent. Therefore $Q^{ss}$ can be computed as:
\begin{displaymath}
Q^{ss} = \left\{ \begin{array}{l}
1, \hspace{30mm} W^{ss} \le 2 \\
\sum^1_{k=0}A(W^{ss}, k) \\
+ \sum^{W^{ss}-1}_{k=2}A(W^{ss}, k)h(\lfloor \frac{k}{2} \rfloor +
k), \textrm{ otherwise}
\end{array} \right.
\end{displaymath}
which is same as (\ref{eq:mqss}) since
\begin{displaymath}
h(2k) = h(\lfloor \frac{k}{2} \rfloor + k ) \hspace{10mm} \textrm{ for } k \ge 2 .
\end{displaymath}
The expected time that TCP spends in the RTOs is given by (\ref{eq:fzto}).
The time that TCP spends in the fast retransmit phase, $n_t$,
depends on where the loss would happen~\cite{skv}:
\begin{equation}
n_t = \left\{ \begin{array}{l}
2RTT, \hspace{7mm}\textrm{ if the lost packet is in the last} \\
\hspace{20mm} \textrm{three packets of the window} \\
RTT, \hspace{35mm} \textrm{otherwise}
\end{array} \right.
\end{equation}
Thus, when the congestion window size $W^{ss}$ is bigger than three, the
expected time, $E[n_t]$ can be found to be:
\begin{eqnarray}
E[n_t] & = & \frac{1-(1-p)^{W^{ss}-3}}{1-(1-p)^{W^{ss}}}\times 2RTT \nonumber \\
& & + \frac{(1-p)^{W^{ss}-3}(1-(1-p)^3)}{1-(1-p)^{W^{ss}}}\times RTT \nonumber \\
& = & RTT \cdot \frac{2-(1-p)^{W^{ss}-3} - (1-p)^{W^{ss}}}{1-(1-p)^{W^{ss}}}. \label{eq:nt}
\end{eqnarray}
Finally, the expected latency that this loss would incur is:
\begin{equation}
T_{loss} = (1-(1-p)^d)(Q^{ss}E[Z^{TO}] + (1-Q^{ss})E[n_t]) ,
\end{equation}
where $W^{ss}$ is
\begin{equation}
W^{ss} = min \bigg( W_m, \frac{E[Y_{init}]+2}{g^2} \bigg) . \label{eq:winit}
\end{equation}
\subsection{Sending the Rest of the Packets}
After the first packet loss, the transmission latency of the rest
($d-E[Y_{init}]$) packets is obtained by using our extended steady-state
model as follows:
\begin{eqnarray}
T_{rest} & = & \frac{d-E[Y_{init}]}{H} \nonumber \\
& = & \frac{dp - (1-(1-p)^d)(1-p)}{p \cdot H} , \label{eq:trest}
\end{eqnarray}
where $H$ is as given by (\ref{eq:trou}).
%------------------------------------------------------------------------------------------------------
\subsection{Total Latency}
Grouping (\ref{eq:three}), (\ref{eq:senequation}), (\ref{eq:nt}) and
(\ref{eq:trest}) together and considering the delay ($T_{delay}$) caused
by the delayed acknowledgment for the first packet (whose mean value is
100ms for the BSD-derived implementations), we now have the total
expected latency:
\begin{equation} \label{eq:latency_z}
T_{latency} = E[T_{twhs}] + E[n]RTT + T_{loss} + T_{rest} + T_{delay} - \frac{RTT}{2} .
\end{equation}
Note that the last term is due to the fact that only half of a round is
needed to send the last window of packets.
In Fig. \ref{fig:l_all}, we compare this model for short-lived TCP connections against our
steady-state model. Clearly, as the transferred file size increases,
the short-lived TCP connection model approaches the steady state model. This is because
when a connection has a large amount of data to send, TCP spends
most of its time in the steady-state. In addition,
as the loss rate increases, the throughput predicted by the short-lived
TCP connection model approaches the one predicted by the steady-state model. This
is because as the connection loses its packets more frequently,
the transient slow-start phase ends quickly and the remaining packets
are sent in the steady-state phase.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/l_all.eps}
\end{center}
\caption{The model for short-lived TCP connections is compared with the steady-state model in
terms of throughput versus loss rate for different file sizes. Model parameter values:
$RTT = 200ms$, $MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$, $W_m = 20segments$, $b =2$.}
\label{fig:l_all}
\end{figure}
%=======================================================================================================
\section{Model Validation through Simulation} \label{sec:sim}
We validated our proposed analytical models with
simulation experiments. We performed all experiments in \textit{ns-2}~\cite{ns}
using the FullTCP agent. The FullTCP agent is modeled based on the
4.4BSD TCP implementation and can simulate all the important features of
TCP Reno. The \textit{ns-2} simulation model used in all experiments is shown in
Fig. \ref{fig:sim_t}.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/sim.eps}
\end{center}
\caption{The \textit{ns}-2 model that was used to validate our analytical TCP models.}
\label{fig:sim_t}
\end{figure}
Unlike in \cite{csa} where the Bernoulli loss model is used, in our
experiments packets were getting lost according to the bursty loss
model. Since \textit{ns-2} does not have built-in bursty packet loss model,
we added our own BurstyError Model, which was derived from the basic Error Model
class. This BurstyError Model drops packets with probability $p$, which
is a Bernoulli trial. After a packet is selected to be dropped with
probability $p$, all the subsequent packets in transit are also
dropped. This emulates the DropTail queues behavior under congestion
conditions.
We used FTP\footnote{FTP is a major Internet application that is used to remotely
transfer files.} as the application for sending a controlled number of
packets over a 10Mbps link. The experiments were designed such that the
minimum RTT was 200ms.
%----------------------------------------------------------------------------------------
\subsection{The Steady-state Model}
Using the same system parameter values that were used to generate Fig. \ref{fig:diff},
we performed 1000 simulation experiments for each value of $p$, where $p$
was varied from 0.005 to 0.1. The file size was set to 10MB.
Fig. \ref{fig:stst_s} compares the simulation results against the analytical
results obtained from our proposed steady-state model
(Full: Equation (\ref{eq:troud}), and Approximate: Equation (\ref{eq:fssr}))
and the one developed in \cite{pftk}.
Clearly, the results match our expectations. The predicted throughput values
at each value of $p$ obtained from our model much closer to the simulation
values.
To quantify the accuracy of our model relative to the simulation data,
we computed the average error using the following expression taken from
\cite{pftk}):
\begin{small}
\begin{equation}
\frac{\sum_{observations} |Th_{predicted}(p) -
Th_{observed}(p)|/Th_{observed}(p)}{Number\ of\ observations},\nonumber
\end{equation}
\end{small}
where $Th_{predicted}$ is the throughput predicted by the models and
$Th_{observed}$ is the throughput observed from the simulation experiments.
A smaller average error value indicates a better model
accuracy. We plotted these average errors against loss rates in
Fig.~\ref{fig:avr_e}. It shows that in most cases the average error is under
$8\%$ for our proposed full model (Equation (\ref{eq:troud})) and above $20\%$ for the one from
\cite{pftk}. Approximately, in most cases, our model is $75\%$ more accurate than the model
proposed in \cite{pftk}. This supports our claim that by including the slow-start phase into the
steady-state model more accurate predictions can be obtained.
In addition, Fig.~\ref{fig:avr_e} depicts the following:
the average error in predicted throughput from both analytical models increases
as $p$ approaches zero. Let say that $p=0$ and the initial slow-start threshold is set
to the maximum window size. Then, the initial slow-start phase is extended until the congestion
window reaches the maximum window size. Since there are no packet losses, TCP never switches
to the congestion avoidance phase, but rather continues transmitting packets at its
maximum sending rate allowed by the maximum window size.
For these cases that $p \approx 0$, our short-lived TCP flow model should be used instead of the
steady-state model.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/sta_s.eps}
\end{center}
\caption{Predicted throughput obtained by our proposed steady-state model and the one developed
in \cite{pftk} are compared against simulation results for the case of $0.005 < p < 0.1$
$RTT = 200ms$, $MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$, $W_m = 20segments$.}
\label{fig:stst_s}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/avr_error_m.eps}
\end{center}
\caption{Our proposed steady-state model is compared with the one developed
in \cite{pftk} in terms of the \textit{average error}
for the case of $0.005 < p < 0.1$
$RTT = 200ms$, $MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$, $W_m = 20segments$.}
\label{fig:avr_e}
\end{figure}
%----------------------------------------------------------------------------------------
\subsection{Short-lived Flows Model: Latency versus Transferred File Size}
Figure~\ref{fig:slowstart_p0} shows the relationship between the latency
and the transferred file size under no loss conditions. It compares the
latency predictions given by our proposed short-lived TCP model
(Equation (\ref{eq:latency_z})) and the ones obtained by the
short-lived TCP models developed in \cite{csa} and \cite{skv} against the
simulation results. Obviously, our model's prediction values match the
simulated values better that the values obtained by the other models.
Our model resulted in $5.83\%$ average error, compared to $9.40\%$ and $14.53\%$ obtained by
the models in \cite{csa} and \cite{skv}, respectively.
Analyzing the results, we also observed that all prediction errors resulted from
our model are within $[-RTT/2, RTT/2]$. We believe that this is because our model accounts
for the delayed acknowledgment mechanism. Thus, for the cases where $RTT$ is small, the
prediction errors are insignificant. This is not valid for the other models propose by
\cite{csa} and \cite{skv}.
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/slowstart_p0.eps}
\end{center}
\caption{Predicted latency versus transferred file size obtained by our short-lived
TCP connection model and the ones developed in \cite{csa} and \cite{skv} are
compared against simulation results for the case of $p=0$, $RTT = 100ms$,
$MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$, $W_m = 20segments$.}
\label{fig:slowstart_p0}
\end{figure}
%-------------------------------------------------------------------------------------
\subsection{Short-lived Flows Model: Throughput versus Loss Rate and File Size}
Figures~\ref{fig:fin_2k}, \ref{fig:fin_6k} and \ref{fig:fin_11k} compares
the throughput versus loss rate predictions given by our proposed short-lived TCP model
and the one obtained by the short-lived TCP models developed in \cite{csa} against the
simulation results for the cases of 2KB, 6KB, and 11KB file sizes.
Table \ref{tb:flt_e} compares the two models in terms of the average error.
As can be observed, when the transferred file size is small and the loss
rate is low, our model yields more accurate predictions than the model
from \cite{csa}. Again, this is because our model accounts for the
delay acknowledgment mechanism and uses $g$ (golden number) instead of $\gamma$ (see
\cite{csa}). However, for large file sizes and loss rates,
both models yield similar predictions and in agreement with our steady-state model, as
expected.
\begin{table}
\caption{Our short-lived TCP connection model is compared against the one proposed in
\cite{csa} in terms of the average error.}
\begin{center}
\begin{tabular}{ c | c | c | c | c }
\hline
Loss Rate & $p=0$ & \multicolumn{3}{c}{ $3 \times 10^{-3} \sim 10^{-1} $ } \\ \cline{3-5}
File Size & 0.5 $\sim$ 26KB & 2KB & 6KB & 11KB \\
\hline
\hline
[CSA00] & 9.40\% & 4.08\% & 6.43\% & 8.38\% \\
\hline
Proposed & 5.83\% & 0.59\% & 7.54\% & 7.64\% \\
\hline
\end{tabular}
\end{center}
\label{tb:flt_e}
\end{table}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/fin_2k.eps}
\end{center}
\caption{Predicted throughput versus loss rate obtained by our short-lived
TCP connection model and the one developed in \cite{csa} are compared against
simulation results for the case of a 2KB-file-size and $RTT = 100ms$,
$MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$,$W_m = 20segments$.}
\label{fig:fin_2k}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/fin_6k.eps}
\end{center}
\caption{Predicted throughput versus loss rate obtained by our short-lived
TCP connection model and the one developed in \cite{csa} are compared against
simulation results for the case of a 6KB-file-size and $RTT = 100ms$,
$MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$,$W_m = 20segments$.}
\label{fig:fin_6k}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=0.5\textwidth]{figures/fin_11k.eps}
\end{center}
\caption{Predicted throughput versus loss rate obtained by our short-lived
TCP connection model and the one developed in \cite{csa} are compared against
simulation results for the case of a 11KB-file-size and $RTT = 100ms$,
$MSS = 536bytes$, $w_1 =1segment$, $T_0=1sec$,$W_m = 20segments$.}
\label{fig:fin_11k}
\end{figure}
%=======================================================================================
\section{Conclusion} \label{sec:con}
In this paper, we first developed a better and tractable model for the
congestion window growth pattern in the slow-start phase.
Using this new slow-start phase model, we constructed an accurate model for
the short-lived TCP flows and then an extended and more accurate TCP steady-state model.
Major improvement in both models was achieved by relaxing key assumptions and
enhancing critical approximations that have been made in existing popular models.
We validated our models with simulations and compared them against the models
developed in \cite{csa,skv,pftk}. The results support our claim that our models
yield more accurate predictions.
\nocite{*}
\bibliographystyle{IEEE}
\bibliography{IEEEabrv,short_dong}
\appendices
\section{The expectation of 1/w} \label{ap:ew}
From Taylor formula, we know:
\begin{equation} \label{eq:taylor}
f(W) = \sum_{i=0}^\infty \frac{f^i(a)}{i!}(W-a)^i
\end{equation}
Let $f(W)$ and $a$ be $1/W$ and $E[W]$ respectively. We thus have:
\begin{equation} \label{eq:fnw}
f^n(W) = (-1)^nn!W^{-(n+1)}
\end{equation}
Substituting $f^i(a)$ in (\ref{eq:taylor}) and making use of (\ref{eq:fnw}) and $E[W]$, we get:
\begin{eqnarray}
\frac{1}{W} & = & \sum_{i=0}^\infty\frac{(-1)^ii!E[W]^{-(i+1)}}{i!}(W-E[W])^i \nonumber\\
& = & \sum_{i=0}^\infty\frac{(-1)^i(W-E[W])^i}{E[W]^{(i+1)}} \label{eq:ovx}
\end{eqnarray}
Taking expectation on both sides of (\ref{eq:ovx}), results in:
\begin{eqnarray}
E[\frac{1}{W}] & = & E[\sum_{i=0}^\infty\frac{(-1)^i(W-E[W])^i}{E[W]^{(i+1)}}] \nonumber\\
& = & \sum_{i=0}^\infty E[\frac{(-1)^i(W-E[W])^i}{E[W]^{(i+1)}}] \nonumber\\
& = & \sum_{i=0}^\infty\frac{(-1)^iE[(W-E[W])^i]}{E[W]^{(i+1)}} \nonumber\\
& = & \frac{1}{E[W]}+\frac{Var(W)}{E[W]^3} + \sum_{i=3}^\infty\frac{(-1)^iE[(w-E[W])^i]}{E[W]^{(i+1)}} \nonumber\\
& \approx & \frac{1}{E[W]}+\frac{Var(W)}{E[W]^3} \nonumber\\
& = & \frac{1}{E[W]}(1+\frac{Var(W)}{E[W]^2})
\end{eqnarray}
The approximation holds when $E[W]^{(i+1)} \gg E[(W-E[W])^i]$.
\section{The variance of $W^{TD}$} \label{ap: va}
Using similar assumptions as in the previous analysis, from (\ref{eq:al}), we know
\begin{equation}
Var[\alpha] = \frac{1-p}{p^2} \label{eq:val}
\end{equation}
Thus from (\ref{eq:yi}):
\begin{eqnarray}
Var[Y] & = & \frac{1-p}{p^2} + Var[W^{TD}] \label{eq:left}
\end{eqnarray}
Using (\ref{eq:wi}), we get the auto-correlation at the zero point\footnote{This is equal to $E[X^2]$.}:
\begin{eqnarray}
R_w(0) & = & \frac{R_w(0)}{4} + \frac{R_x(0)}{b^2} \nonumber \\
R_x(0) & = & \frac{3b^2}{4}R_w(0) \label{eq:xw}
\end{eqnarray}
And using (\ref{eq:awk}), (\ref{eq:smfb}) and (\ref{eq:eow}), we can compute the variance of $\beta$ as following:
\begin{eqnarray}
Var[\beta] & = & R_{\beta}(0) - {E[\beta]}^2 \nonumber \\
& = & E[\beta^2] - {E[\beta]}^2 \nonumber \\
& = & E[\sum_{k=0}^{w-1} k^2 p(\beta = k)|w] - {E[\beta]}^2 \nonumber \\
& = & E[\sum_{k=0}^{w-1}\frac{k^2 (1-p)^kp}{1-(1-p)^w}|w] - {E[\beta]}^2 \nonumber \\
& \approx & \frac{2(1-p)^2}{p^2} - {(1-p)[\sqrt{\frac{8}{3bp}}-1]}^2 \label{eq:vbeta}
\end{eqnarray}
Using (\ref{eq:fin}), we can also get:
\begin{eqnarray}
Var[Y] & = & Var[\frac{X_i}{2}(\frac{W^{TD}_{i-1}}{2}+W^{TD}_i-1)] + Var[\beta]\nonumber \\
& = & E[(\frac{X_i}{2})^2]E[(\frac{W^{TD}_{i-1}}{2}+W^{TD}_i-1)^2] \nonumber \\
& & - \big(E[\frac{X_i}{2}(\frac{W^{TD}_{i-1}}{2}+W^{TD}_i-1)] \big)^2 + Var[\beta] \nonumber\\
& = & \frac{R_x(0)}{4}\frac{5R_w(0)}{4} - {E[\frac{X}{2}]}^2 {E[\frac{W^{TD}_{i-1}}{2}+W^{TD}_i-1]}^2 \nonumber \\
& & + Var[\beta] \nonumber\\
& = & \frac{\frac{3b^2}{4}R_w(0)}{4}\frac{5R_w(0)}{4} - \frac{{E[X]}^2}{4} {[\frac{3}{2}E[W^{TD}] - 1]}^2 \nonumber \\
& & + Var[\beta] \nonumber\\
& = & \frac{15b^2}{64}{[Var[W^{TD}]+{E[W^{TD}]}^2]}^2 \nonumber \\
& & - \frac{{E[X]}^2}{4} {[\frac{3}{2}E[W^{TD}] - 1]}^2 + Var[\beta] \nonumber\\
& \approx & \frac{15b^2}{64}{[Var[W^{TD}]+\frac{8}{3bp}]}^2 \nonumber \\
& & - \frac{1}{4} {\Big(\frac{b}{2}\sqrt{\frac{8}{3bp}} + b \Big)}^2 {\Big( \frac{3}{2}\sqrt{\frac{8}{3bp}} - 1 \Big)}^2 \nonumber \\
& & + \frac{2(1-p)^2}{p^2} - {(1-p)[\sqrt{\frac{8}{3bp}}-1]}^2 \label{eq:right}
\end{eqnarray}
Combining (\ref{eq:right}) and (\ref{eq:left}), we obtain the final equation:
\begin{small}
\begin{eqnarray}
\frac{1-p}{p^2} + Var[W^{TD}] & = & \frac{15b^2}{64}{[Var[W^{TD}]+\frac{8}{3bp}]}^2 \nonumber \\
& & - \frac{1}{4} {\Big(\frac{b}{2}\sqrt{\frac{8}{3bp}} + b \Big)}^2 {\Big( \frac{3}{2}\sqrt{\frac{8}{3bp}} - 1 \Big)}^2 \nonumber \\
& & + \frac{2(1-p)^2}{p^2} - {(1-p)[\sqrt{\frac{8}{3bp}}-1]}^2 \label{eq:vfinal}
\end{eqnarray}
\end{small}
Solving (\ref{eq:vfinal}), we obtain the variance of $W^{TD}$ as follows:
\begin{equation}
Var[W^{TD}]_{p \to 0} \approx \frac{8(\sqrt{3}-1)}{3bp}
\end{equation}
% You can push biographies down or up by placing
% a \vfill before or after them. The appropriate
% use of \vfill depends on what kind of text is
% on the last page and whether or not the columns
% are being equalized.
%\vfill
% Can be used to pull up biographies so that the bottom of the last one
% is flush with the other column.
%\enlargethispage{-5in}
% that's all folks
\end{document}