- #! /bin/sh
- #!/usr/bin/tclsh
- #
- # Copyright (C) 2002 by USC/ISI
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms are permitted
- # provided that the above copyright notice and this paragraph are
- # duplicated in all such forms and that any documentation, advertising
- # materials, and other materials related to such distribution and use
- # acknowledge that the software was developed by the University of
- # Southern California, Information Sciences Institute. The name of the
- # University may not be used to endorse or promote products derived from
- # this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
- # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- #
- # RAMP is a set of scripts that take tcpdump trace as input and
- # output a set of CDF files that model Web traffic, FTP traffic
- # and the underlying topology information including RTT and
- # bottleneck bandwidth. It also outputs a time series of
- # traffic size (in 1ms block) for further wavelet scaling analysis
- # for detailed explanation of RAMP, please see
- # http://www.isi.edu/~kclan/paper/ramp.pdf
- #
- # usage:
- # ./RAMP [-c] <tcpdump) trace> <threshold> <network prefix>
- #
- # [-f] : generate multiple traffic models
- # based on filter specified in model.conf
- # [-c] : convert trace file from CoralReef to
- # tcpdump format
- # <tcpdump trace> : tcpdump trace file generated using
- # tcpdump -w option
- # <threshold> : the threshold time value (in millisecond)
- # that distinguishes idle periods in order
- # to infer user "think" times between
- # requests for new top-level pages.
- # <network prefix>: network prefix used to distinguish
- # inbound vs. outbound traffic
- #
- # example: ./RAMP tracefile 1000 128.9.0.0/255.255.0.0
- #
- # Currently it's only tested on Linux system
- #
- # Here is the version of tcpdump we used for testing
- # tcpdump version 3.6.3
- # libpcap version 0.6
- #
- # We assume the output of tcpdump with the following format for
- # TCP packets
- #
- # timestamp src > dst: flags data鈥恠eqno ack window urgent options
- #
- # Note that some version of tcpdump might output with an extra "<"
- # i.e.
- # timestamp < src > dst: flags data鈥恠eqno ack window urgent options
- #
- # which is not compatible with our codes
- # One simple workaround is to use the provided remove.pl script
- # to remove the extra "<" in the tcpdump output
- #
- # This work is supported by DARPA through SAMAN Project
- # (http://www.isi.edu/saman/), administered by the Space and Naval
- # Warfare System Center San Diego under Contract No. N66001-00-C-8066
- #
- #
- #
- echo -e
- crl=no
- if test $1 = -c
- then
- if [ -x ./crl_to_pcap ] ; then
- mv $2 $2.crl
- ./crl_to_pcap -o $2 $2.crl
- f=$2
- t=$3
- p=$4
- else
- echo "CoralReef to Tcpdump converter does not exist!!"
- exit 0
- fi
- elif test $1 = -f
- then
- if [ -e ./model.conf ] ; then
- f=$2
- t=$3
- p=$4
- filter=yes
- else
- echo "model.conf does not exist!!"
- exit 0
- fi
- else
- f=$1
- t=$2
- p=$3
- fi
- TCPDUMP=/usr/sbin/tcpdump
- #preprocessing the trace file
- date
- echo "*** parsing $tcpdump file ***"
- echo "*** parsing .all ***"
- $TCPDUMP -nn -tt -q -r $f > $f.all
- $TCPDUMP -nn -tt tcp -r $f > $f.tcp
- date
- echo "*** parsing .www ***"
- $TCPDUMP -nn -tt -r $f tcp port 80 > $f.www
- #extrace FTP data connections from the trace
- date
- echo "*** parsing .ftp ***"
- $TCPDUMP -nn -tt -r $f port ftp > $f.ftp1
- cat $f.ftp1 | getftpnode.pl
- #find the IP address of FTP clients
- cat $f.ftp1 | getFTPclient.pl > $f.ftp2
- cat $f.tcp | getFTP.pl -r $f.ftp2 > $f.ftp
- #extract tcp flows from Web server
- date
- echo "*** parsing .http-srv ***"
- $TCPDUMP -nn -tt -r $f tcp src port 80 > $f.http-srv
- #date
- #echo "*** analyze traffic mix ***"
- #cat $f.all | io.pl -s $p -w $f.all
- #cat $f.all.inbound | traffic-classify > $f.traffic.cnt.inbound
- #cat $f.all.outbound | traffic-classify > $f.traffic.cnt.outbound
- #date
- #echo "*** analyze flow statistics ***"
- #awk -f flow.awk < $f.all.outbound > $f.all.outbound.flow
- #awk -f flow.awk < $f.all.inbound > $f.all.inbound.flow
- #sort -s -o $f.all.outbound.flow.sort -T /tmp $f.all.outbound.flow
- #sort -s -o $f.all.inbound.flow.sort -T /tmp $f.all.inbound.flow
- #cat $f.all.outbound.flow.sort | flow.pl -w $f.outbound.flow
- #cat $f.all.inbound.flow.sort | flow.pl -w $f.inbound.flow
- #sort -s -o $f.inbound.flow.start.sort $f.inbound.flow.start
- #sort -s -o $f.outbound.flow.start.sort $f.outbound.flow.start
- #awk -f arrive2inter.awk < $f.outbound.flow.start.sort > $f.outbound.flow.arrival
- #awk -f arrive2inter.awk < $f.inbound.flow.start.sort > $f.inbound.flow.arrival
- #dat2cdf -e 1024 -i 1024 -d 1024 -t $f.outbound.flow.size
- #dat2cdf -e 1024 -i 1024 -d 1024 -t $f.inbound.flow.size
- #dat2cdf -e 0 -i 1 -d 1 -t $f.outbound.flow.dur
- #dat2cdf -e 0 -i 1 -d 1 -t $f.inbound.flow.dur
- #dat2cdf -e 0 -i 0.001 -d 1 -t $f.outbound.flow.arrival
- #dat2cdf -e 0 -i 0.001 -d 1 -t $f.inbound.flow.arrival
- #seperate inbound and outbound flows in web traffic
- date
- echo "WWW"
- cat $f.www | io.www.pl -s $p -w $f.www
- cat $f.http-srv | io.www.pl -s $p -w $f.http-srv
- #seperate inbound and outbound flows in FTP traffic
- date
- echo "FTP"
- cat $f.ftp | io.pl -s $p -w $f.ftp
- if test "$filter" = yes; then
- cat model.conf | wmodel.pl -r $f.http-srv.inbound -t $t
- cat model.conf | fmodel.pl -r $f.ftp
- getftpserver.pl
- exit 0
- fi
- ################################################################
- #please read output_format.pdf and trace_processing.pdf included in
- # this package for detailed explanation of the follwing commands
- /bin/rm -rf *.time-series
- /bin/rm -rf *connect.time*
- date
- echo "*** Analyze Inbound traffic ***"
- #output the summary of http connections
- echo "run http_connect"
- sort -s -o $f.in.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $f.http-srv.inbound
- http_connect -r $f.in.http-srv-sort -w $f.in.http-srv.connect
- grep "ACT" $f.in.http-srv.connect > $f.in.http-srv.connect.time
- sort $f.in.http-srv.connect.time > $f.in.http-srv.connect.time.sort
- #output the summary of http client request and server response
- date
- echo "run http_active"
- sort -s -o $f.in.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $f.in.http-srv.connect
- http_active -r $f.in.http-srv.connect.sort -w $f.in.http-srv.active -I $t
- #output statistical distribution of web traffic
- #in particular the distributions of the following parameters
- #(a) session inter-arrival
- #(b) number of pages per session
- #(c) page inter-arrival
- #(d) page size
- #(e) object inter-arrival
- #(f) object size
- #(g) request size
- #(h) ratio between persistent and non-persistent connection
- #(i) server popularity
- date
- echo "compute CDF statistics"
- cat $f.in.http-srv.active.activity | outputCDF -e inbound
- #output the time series of web traffic usage in 1ms block
- #for later use of wavelet scaling analysis
- date
- echo "compute time series (1ms block)"
- bw.tcl $f.http-srv.inbound
- cat $f.http-srv.inbound.bw | time-series.pl > $f.in.time-series
- date
- echo "*** Analyze Outbound traffic ***"
- #output the summary of http connections
- echo "run http_connect"
- sort -s -o $f.out.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $f.http-srv.outbound
- http_connect -r $f.out.http-srv-sort -w $f.out.http-srv.connect
- grep "ACT" $f.out.http-srv.connect > $f.out.http-srv.connect.time
- sort $f.out.http-srv.connect.time > $f.out.http-srv.connect.time.sort
- #output the summary of http client request and server response
- date
- echo "run http_active"
- sort -s -o $f.out.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $f.out.http-srv.connect
- http_active -r $f.out.http-srv.connect.sort -w $f.out.http-srv.active -I $t
- date
- echo "compute CDF statistics of web traffic"
- cat $f.out.http-srv.active.activity | outputCDF -e outbound
- #output the time series of web traffic usage in 1ms block
- #for later use of wavelet scaling analysis
- date
- echo "compute time series (1ms block)"
- bw.tcl $f.http-srv.outbound
- cat $f.http-srv.outbound.bw | time-series.pl > $f.out.time-series
- #################################################################
- echo "*** Delay and Bandwidth estimation ***"
- echo "Estimate delay and bottleneck bandwidth for WWW traffic"
- date
- echo "pre-processing: output traffic between web servers and clients"
- cat $f.www | BW-seq.pl -s $p -p 80
- sort inbound.seq -o inbound.seq.sorted
- sort outbound.seq -o outbound.seq.sorted
- date
- echo "search for DATA/ACK packets which have the same seqence number for outboun
- d traffic"
- cat outbound.seq.sorted | BW-pair.pl > $f.outbound.pair
- date
- echo "estimate the bandwidth for inbound/outbound traffic"
- cat $f.outbound.pair | BW.out.pl -w $f.www
- cat inbound.seq.sorted | BW.in.pl -w $f.www
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.www.outbound.BW
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.www.inbound.BW
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.www.outbound.delay
- date
- echo "compute packet size distribution"
- sort -o inbound.pkt.size.sort inbound.pkt.size
- sort -o outbound.pkt.size.sort outbound.pkt.size
- awk -f pkt.awk < inbound.pkt.size.sort > $f.www.inbound.pktsize
- awk -f pkt.awk < outbound.pkt.size.sort > $f.www.outbound.pktsize
- dat2cdf -e 0 -i 1 -d 1 -t $f.www.inbound.pktsize
- dat2cdf -e 0 -i 1 -d 1 -t $f.www.outbound.pktsize
- date
- echo "Locate SYN connection"
- cat $f.www | delay.pl -p 80 > $f.www.sync
- sort -s -o $f.www.sync.sorted -T /tmp $f.www.sync
- date
- echo "compute delay for each SYN connection pair between servers and clients"
- pair.tcl $f.www.sync.sorted $p > $f.www.sync.delay
- sort -s -o $f.www.sync.delay.sorted -T /tmp $f.www.sync.delay
- awk -f delay.awk < $f.www.sync.delay.sorted > $f.www.inbound.delay
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.www.inbound.delay
- ##########################################################
- echo "Estimate delay and bottleneck bandwidth for FTP traffic"
- date
- echo "pre-processing: output traffic between ftp servers and clients"
- cat $f.ftp | BW-seq-ftp.pl -r $f.ftp2 -s $p
- sort inbound.seq -o inbound.seq.sorted
- sort outbound.seq -o outbound.seq.sorted
- date
- echo "search for DATA/ACK packets which have the same seqence number for outboun
- d traffic"
- cat outbound.seq.sorted | BW-pair.pl > $f.outbound.pair
- date
- echo "estimate the bandwidth for inbound/outbound traffic"
- cat $f.outbound.pair | BW.out.pl -w $f.ftp
- cat inbound.seq.sorted | BW.in.pl -w $f.ftp
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.outbound.BW
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.inbound.BW
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.outbound.delay
- date
- echo "Locate SYN connection"
- cat $f.ftp | delay.pl -p 21 > $f.sync
- sort -s -o $f.sync.sorted -T /tmp $f.sync
- date
- echo "compute delay for each SYN connection pair between servers and clients"
- pair.tcl $f.sync.sorted $p > $f.sync.delay
- sort -s -o $f.sync.delay.sorted -T /tmp $f.sync.delay
- awk -f delay.awk < $f.sync.delay.sorted > $f.ftp.inbound.delay
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.inbound.delay
- ###########################################################
- #compuate statistical distribution of FTP trafic
- #specifically the follwoing parameters in FTP model
- # (1) ftp file arrival
- # (2) number of files per ftp session
- # (3) flie size
- echo "Output flow statistics for FTP traffic"
- date
- echo "estimate file distribution of outbound FTP traffic"
- cat $f.ftp.outbound | awk -f ftp.awk | sort > $f.ftp.outbound.flow.sort
- cat $f.ftp.outbound.flow.sort | ftp.pl -w $f.ftp.outbound
- sort -o $f.ftp.outbound.arrive.sort $f.ftp.outbound.arrive
- awk -f ftp.arrive.awk < $f.ftp.outbound.arrive.sort > $f.ftp.outbound.file.inter
- echo "estimate file distribution of inbound FTP traffic"
- cat $f.ftp.inbound | awk -f ftp.awk | sort > $f.ftp.inbound.flow.sort
- cat $f.ftp.inbound.flow.sort | ftp.pl -w $f.ftp.inbound
- sort -o $f.ftp.inbound.arrive.sort $f.ftp.inbound.arrive
- awk -f ftp.arrive.awk < $f.ftp.inbound.arrive.sort > $f.ftp.inbound.file.inter
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.outbound.file.inter
- dat2cdf -e 0 -i 1000 -d 1000 -t $f.ftp.outbound.size
- dat2cdf -e 0 -i 1 -d 1 -t $f.ftp.outbound.fileno
- dat2cdf -e 0 -i 0.001 -d 1 -t $f.ftp.inbound.file.inter
- dat2cdf -e 0 -i 1000 -d 1000 -t $f.ftp.inbound.size
- dat2cdf -e 0 -i 1 -d 1 -t $f.ftp.inbound.fileno
- ######################################
- echo "Output TCP window size"
- date
- echo "WWW"
- grep " S " $f.www.outbound | grep ".80 >" > $f.www.outbound.svr.win
- grep " S " $f.www.inbound | grep ".80 >" > $f.www.inbound.svr.win
- grep " S " $f.www.outbound | grep ".80:" > $f.www.outbound.clnt.win
- grep " S " $f.www.inbound | grep ".80:" > $f.www.inbound.clnt.win
- awk -f win.awk < $f.www.outbound.svr.win > $f.www.outbound.wins
- awk -f win.awk < $f.www.inbound.svr.win > $f.www.inbound.wins
- awk -f win.awk < $f.www.outbound.clnt.win > $f.www.outbound.winc
- awk -f win.awk < $f.www.inbound.clnt.win > $f.www.inbound.winc
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.www.outbound.wins
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.www.outbound.winc
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.www.inbound.wins
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.www.inbound.winc
- echo "FTP"
- grep " S " $f.ftp.outbound | grep ".20 >" > $f.ftp.outbound.svr.win
- grep " S " $f.ftp.inbound | grep ".20 >" > $f.ftp.inbound.svr.win
- grep " S " $f.ftp.outbound | grep ".20:" > $f.ftp.outbound.clnt.win
- grep " S " $f.ftp.inbound | grep ".20:" > $f.ftp.inbound.clnt.win
- awk -f win.awk < $f.ftp.outbound.svr.win > $f.ftp.outbound.wins
- awk -f win.awk < $f.ftp.inbound.svr.win > $f.ftp.inbound.wins
- awk -f win.awk < $f.ftp.outbound.clnt.win > $f.ftp.outbound.winc
- awk -f win.awk < $f.ftp.inbound.clnt.win > $f.ftp.inbound.winc
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.ftp.outbound.wins
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.ftp.outbound.winc
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.ftp.inbound.wins
- dat2cdf -e 1024 -i 1024 -d 1024 -t $f.ftp.inbound.winc
- date
- echo "execution complete"