tcp_usrreq.c
上传用户:baixin
上传日期:2008-03-13
资源大小:4795k
文件大小:18k
- /* tcp_usrreq.c - TCP interface routines */
- /* Copyright 1984 - 2002 Wind River Systems, Inc. */
- #include "copyright_wrs.h"
- /*
- * Copyright (c) 1982, 1986, 1988, 1993, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95
- */
- /*
- modification history
- --------------------
- 03i,05jun02,vvv fixed Nagle for large writes (SPR #72213)
- 03h,21mar02,wap avoid making local port of TCP connection the same as
- the foreign port (SPR #73104)
- 03g,12oct01,rae merge from truestack ver 03j, base 03e
- 03f,14nov00,ham remoeved tcpstates declaration(SPR 62272).
- 03e,26aug98,n_s added return val check for mBufClGet in tcp_ctloutput.
- spr #22238.
- 03d,07jul97,vin in tcp_usrreq case PRU_DETACH simplified.
- 03c,08mar97,vin added changes to accomodate changes in pcb structure for
- hash look ups.
- 03b,22nov96,vin modified for cluster support, replace m_get(..) with
- mBufClGet(..).
- 03a,03mar96,vin created from BSD4.4 stuff,integrated with 02o of tcp_usrreq.c.
- */
- /*
- DESCRIPTION
- */
- #include "vxWorks.h"
- #include "net/mbuf.h"
- #include "sys/socket.h"
- #include "net/socketvar.h"
- #include "net/protosw.h"
- #include "errno.h"
- #include "sys/stat.h"
- #include "net/if.h"
- #include "net/route.h"
- #include "netinet/in.h"
- #include "netinet/in_pcb.h"
- #include "netinet/in_systm.h"
- #include "netinet/in_var.h"
- #include "netinet/ip.h"
- #include "netinet/ip_var.h"
- #include "netinet/tcp.h"
- #include "netinet/tcp_fsm.h"
- #include "netinet/tcp_seq.h"
- #include "netinet/tcp_timer.h"
- #include "netinet/tcp_var.h"
- #include "netinet/tcpip.h"
- #include "netinet/tcp_debug.h"
- #include "net/systm.h"
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET
- #include "wvNetLib.h"
- #endif
- #endif
- #ifdef VIRTUAL_STACK
- #include "netinet/vsLib.h"
- #else
- IMPORT int tcp_keepinit;
- #endif /* VIRTUAL_STACK */
- IMPORT unsigned long (*pTcpRandHook)(void);
- LOCAL void tcpTraceStub ();
- LOCAL void tcpReportStub ();
- VOIDFUNCPTR tcpTraceRtn = tcpTraceStub; /* exported */
- VOIDFUNCPTR tcpReportRtn = tcpReportStub; /* exported */
- /*
- * TCP protocol interface to socket abstraction.
- */
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET
- /* Set common fields of event identifiers for this module. */
- LOCAL UCHAR wvNetModuleId = WV_NET_TCPREQ_MODULE; /* Value for tcp_usrreq.c */
- LOCAL UCHAR wvNetLocalFilter = WV_NET_NONE; /* Available event filter */
- LOCAL ULONG wvNetEventId; /* Event identifier: see wvNetLib.h */
- #endif /* INCLUDE_WVNET */
- #endif
- /*
- * Process a TCP user request for TCP tb. If this is a send request
- * then m is the mbuf chain of send data. If this is a timer expiration
- * (called from the software clock routine), then timertype tells which timer.
- */
- /*ARGSUSED*/
- int
- tcp_usrreq(so, req, m, nam, control)
- struct socket *so;
- int req;
- struct mbuf *m, *nam, *control;
- {
- register struct inpcb *inp;
- register struct tcpcb *tp = NULL;
- int s;
- int error = 0;
- int ostate;
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_INFO event */
- WV_NET_MARKER_2 (NET_AUX_EVENT, WV_NET_INFO, 47, 8,
- WV_NETEVENT_TCPREQ_START, so->so_fd, req)
- #endif /* INCLUDE_WVNET */
- #endif
- if (req == PRU_CONTROL)
- return (in_control(so, (u_long)m, (caddr_t)nam,
- (struct ifnet *)control));
- if (control && control->m_len) {
- m_freem(control);
- if (m)
- m_freem(m);
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_ERROR event */
- WV_NET_MARKER_2 (NET_AUX_EVENT, WV_NET_ERROR, 38, 4,
- WV_NETEVENT_TCPREQ_BADMEM, so->so_fd, req)
- #endif /* INCLUDE_WVNET */
- #endif
- return (EINVAL);
- }
- s = splnet();
- inp = sotoinpcb(so);
- /*
- * When a TCP is attached to a socket, then there will be
- * a (struct inpcb) pointed at by the socket, and this
- * structure will point at a subsidary (struct tcpcb).
- */
- if (inp == 0 && req != PRU_ATTACH) {
- splx(s);
- #if 0
- /*
- * The following corrects an mbuf leak under rare
- * circumstances, but has not been fully tested.
- */
- if (m && req != PRU_SENSE)
- m_freem(m);
- #else
- /* safer version of fix for mbuf leak */
- if (m && (req == PRU_SEND || req == PRU_SENDOOB))
- m_freem(m);
- #endif
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_ERROR event */
- WV_NET_MARKER_2 (NET_AUX_EVENT, WV_NET_ERROR, 39, 5,
- WV_NETEVENT_TCPREQ_NOPCB, so->so_fd, req)
- #endif /* INCLUDE_WVNET */
- #endif
- return (EINVAL); /* XXX */
- }
- if (inp) {
- tp = intotcpcb(inp);
- /* WHAT IF TP IS 0? */
- #ifdef KPROF
- tcp_acounts[tp->t_state][req]++;
- #endif
- ostate = tp->t_state;
- } else
- ostate = 0;
- switch (req) {
- /*
- * TCP attaches to socket via PRU_ATTACH, reserving space,
- * and an internet control block.
- */
- case PRU_ATTACH:
- if (inp) {
- error = EISCONN;
- break;
- }
- error = tcp_attach(so);
- if (error)
- break;
- /* TCP sockets use path MTU discovery by default. */
- so->so_options |= SO_USEPATHMTU;
- if ((so->so_options & SO_LINGER) && so->so_linger == 0)
- so->so_linger = TCP_LINGERTIME;
- tp = sototcpcb(so);
- break;
- /*
- * PRU_DETACH detaches the TCP protocol from the socket.
- * If the protocol state is non-embryonic, then can't
- * do this directly: have to initiate a PRU_DISCONNECT,
- * which may finish later; embryonic TCB's can just
- * be discarded here.
- */
- case PRU_DETACH:
- tp = tcp_disconnect(tp);
- break;
- /*
- * Give the socket an address.
- */
- case PRU_BIND:
- error = in_pcbbind(inp, nam);
- if (error)
- break;
- break;
- /*
- * Prepare to accept connections.
- */
- case PRU_LISTEN:
- if (inp->inp_lport == 0)
- error = in_pcbbind(inp, (struct mbuf *)0);
- if (error == 0)
- tp->t_state = TCPS_LISTEN;
- break;
- /*
- * Initiate connection to peer.
- * Create a template for use in transmissions on this connection.
- * Enter SYN_SENT state, and mark socket as connecting.
- * Start keep-alive timer, and seed output sequence space.
- * Send initial segment on connection.
- */
- case PRU_CONNECT:
- if (inp->inp_lport == 0) {
- error = in_pcbbind(inp, (struct mbuf *)0);
- if (error)
- break;
- /*
- * Avoid case where ephemeral port for local
- * side of connection is the same as destination
- * port for foreign side.
- */
- if (inp->inp_lport ==
- (mtod (nam, struct sockaddr_in *))->sin_port)
- {
- inp->inp_lport = 0;
- error = in_pcbbind (inp, (struct mbuf *)0);
- if (error)
- break;
- }
- }
- error = in_pcbconnect(inp, nam);
- if (error)
- break;
- tp->t_template = tcp_template(tp);
- if (tp->t_template == 0) {
- in_pcbdisconnect(inp);
- error = ENOBUFS;
- break;
- }
- /* Compute window scaling to request. */
- while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
- (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
- tp->request_r_scale++;
- soisconnecting(so);
- tcpstat.tcps_connattempt++;
- tp->t_state = TCPS_SYN_SENT;
- tp->t_timer[TCPT_KEEP] = tcp_keepinit;
- tp->iss = tcp_iss;
- tcp_iss += TCP_ISSINCR/4 + ((0x0000ffff) & (pTcpRandHook() >> 16));
- tcp_sendseqinit(tp);
- error = tcp_output(tp);
- break;
- /*
- * Create a TCP connection between two sockets.
- */
- case PRU_CONNECT2:
- error = EOPNOTSUPP;
- break;
- /*
- * Initiate disconnect from peer.
- * If connection never passed embryonic stage, just drop;
- * else if don't need to let data drain, then can just drop anyways,
- * else have to begin TCP shutdown process: mark socket disconnecting,
- * drain unread data, state switch to reflect user close, and
- * send segment (e.g. FIN) to peer. Socket will be really disconnected
- * when peer sends FIN and acks ours.
- *
- * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
- */
- case PRU_DISCONNECT:
- tp = tcp_disconnect(tp);
- break;
- /*
- * Accept a connection. Essentially all the work is
- * done at higher levels; just return the address
- * of the peer, storing through addr.
- */
- case PRU_ACCEPT:
- in_setpeeraddr(inp, nam);
- break;
- /*
- * Mark the connection as being incapable of further output.
- */
- case PRU_SHUTDOWN:
- socantsendmore(so);
- tp = tcp_usrclosed(tp);
- if (tp)
- error = tcp_output(tp);
- break;
- /*
- * After a receive, possibly send window update to peer.
- */
- case PRU_RCVD:
- (void) tcp_output(tp);
- break;
- /*
- * Do a send by putting data in output queue and updating urgent
- * marker if URG set. Possibly send more data.
- */
- case PRU_SEND:
- if (m->m_flags & M_EOB)
- {
- m->m_flags &= ~M_EOB;
- tp->t_flags |= TF_EOB;
- }
- sbappend(&so->so_snd, m);
- error = tcp_output(tp);
- tp->t_flags &= ~TF_EOB;
- break;
- /*
- * Abort the TCP.
- */
- case PRU_ABORT:
- tp = tcp_drop(tp, ECONNABORTED);
- break;
- case PRU_SENSE:
- ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
- (void) splx(s);
- return (0);
- case PRU_RCVOOB:
- if ((so->so_oobmark == 0 &&
- (so->so_state & SS_RCVATMARK) == 0) ||
- so->so_options & SO_OOBINLINE ||
- tp->t_oobflags & TCPOOB_HADDATA) {
- error = EINVAL;
- break;
- }
- if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
- error = EWOULDBLOCK;
- break;
- }
- m->m_len = 1;
- *mtod(m, caddr_t) = tp->t_iobc;
- if (((int)nam & MSG_PEEK) == 0)
- tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
- break;
- case PRU_SENDOOB:
- if (sbspace(&so->so_snd) < -512) {
- m_freem(m);
- error = ENOBUFS;
- break;
- }
- /*
- * According to RFC961 (Assigned Protocols),
- * the urgent pointer points to the last octet
- * of urgent data. We continue, however,
- * to consider it to indicate the first octet
- * of data past the urgent section.
- * Otherwise, snd_up should be one lower.
- */
- sbappend(&so->so_snd, m);
- tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
- tp->t_force = 1;
- error = tcp_output(tp);
- tp->t_force = 0;
- break;
- case PRU_SOCKADDR:
- in_setsockaddr(inp, nam);
- break;
- case PRU_PEERADDR:
- in_setpeeraddr(inp, nam);
- break;
- /*
- * TCP slow timer went off; going through this
- * routine for tracing's sake.
- */
- case PRU_SLOWTIMO:
- tp = tcp_timers(tp, (int)nam);
- req |= (int)nam << 8; /* for debug's sake */
- break;
- default:
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_EMERGENCY event */
- WV_NET_MARKER_2 (NET_AUX_EVENT, WV_NET_EMERGENCY, 31, 1,
- WV_NETEVENT_TCPREQ_PANIC, so->so_fd, req)
- #endif /* INCLUDE_WVNET */
- #endif
- panic("tcp_usrreq");
- }
- #ifdef BSDDEBUG
- if (tp && (so->so_options & SO_DEBUG))
- (*tcpTraceRtn)(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
- #endif /* BSDDEBUG */
- splx(s);
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_ERROR event */
- if (error)
- {
- WV_NET_MARKER_3 (NET_AUX_EVENT, WV_NET_ERROR, 40, 6,
- WV_NETEVENT_TCPREQ_FAIL, so->so_fd, req, error)
- }
- #endif /* INCLUDE_WVNET */
- #endif
- return (error);
- }
- int
- tcp_ctloutput(op, so, level, optname, mp)
- int op;
- struct socket *so;
- int level, optname;
- struct mbuf **mp;
- {
- int error = 0, s;
- struct inpcb *inp;
- register struct tcpcb *tp;
- register struct mbuf *m;
- register int i;
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_INFO event */
- WV_NET_MARKER_4 (NET_AUX_EVENT, WV_NET_INFO, 46, 7,
- WV_NETEVENT_TCPCTLOUT_START, so->so_fd, op, level, optname)
- #endif /* INCLUDE_WVNET */
- #endif
- s = splnet();
- inp = sotoinpcb(so);
- if (inp == NULL) {
- splx(s);
- if (op == PRCO_SETOPT && *mp)
- (void) m_free(*mp);
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_CRITICAL event */
- WV_NET_MARKER_4 (NET_AUX_EVENT, WV_NET_CRITICAL, 32, 2,
- WV_NETEVENT_TCPCTLOUT_NOPCB, so->so_fd, op, level, optname)
- #endif /* INCLUDE_WVNET */
- #endif
- return (ECONNRESET);
- }
- if (level != IPPROTO_TCP) {
- error = ip_ctloutput(op, so, level, optname, mp);
- splx(s);
- return (error);
- }
- tp = intotcpcb(inp);
- switch (op) {
- case PRCO_SETOPT:
- m = *mp;
- switch (optname) {
- case TCP_NODELAY:
- if (m == NULL || m->m_len < sizeof (int))
- error = EINVAL;
- else if (*mtod(m, int *))
- tp->t_flags |= TF_NODELAY;
- else
- tp->t_flags &= ~TF_NODELAY;
- break;
- case TCP_MAXSEG:
- if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
- tp->t_maxseg = i;
- else
- error = EINVAL;
- break;
- default:
- error = ENOPROTOOPT;
- break;
- }
- if (m)
- (void) m_free(m);
- break;
- case PRCO_GETOPT:
- *mp = m = mBufClGet(M_WAIT, MT_SOOPTS, CL_SIZE_128, TRUE);
- if (m == (struct mbuf *) NULL)
- {
- error = ENOBUFS;
- break;
- }
- m->m_len = sizeof(int);
- switch (optname) {
- case TCP_NODELAY:
- *mtod(m, int *) = tp->t_flags & TF_NODELAY;
- break;
- case TCP_MAXSEG:
- *mtod(m, int *) = tp->t_maxseg;
- break;
- default:
- error = ENOPROTOOPT;
- break;
- }
- break;
- }
- splx(s);
- return (error);
- }
- #ifndef VIRTUAL_STACK
- u_long tcp_sendspace = 1024*8;
- u_long tcp_recvspace = 1024*8;
- #endif
- /*
- * Attach TCP protocol to socket, allocating
- * internet protocol control block, tcp control block,
- * bufer space, and entering LISTEN state if to accept connections.
- */
- int
- tcp_attach(so)
- struct socket *so;
- {
- register struct tcpcb *tp;
- struct inpcb *inp;
- int error;
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_VERBOSE event */
- WV_NET_MARKER_1 (NET_AUX_EVENT, WV_NET_VERBOSE, 41, 9,
- WV_NETEVENT_TCPATTACH_START, so->so_fd)
- #endif /* INCLUDE_WVNET */
- #endif
- if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, tcp_sendspace, tcp_recvspace);
- if (error)
- return (error);
- }
- error = in_pcballoc(so, &tcbinfo);
- if (error)
- return (error);
- inp = sotoinpcb(so);
- tp = tcp_newtcpcb(inp);
- if (tp == 0) {
- int nofd = so->so_state & SS_NOFDREF; /* XXX */
- so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
- in_pcbdetach(inp);
- so->so_state |= nofd;
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_CRITICAL event */
- WV_NET_MARKER_1 (NET_AUX_EVENT, WV_NET_CRITICAL, 33, 3,
- WV_NETEVENT_TCPATTACH_NOBUFS, so->so_fd)
- #endif /* INCLUDE_WVNET */
- #endif
- return (ENOBUFS);
- }
- tp->t_state = TCPS_CLOSED;
- return (0);
- }
- /*
- * Initiate (or continue) disconnect.
- * If embryonic state, just send reset (once).
- * If in ``let data drain'' option and linger null, just drop.
- * Otherwise (hard), mark socket disconnecting and drop
- * current input data; switch states based on user close, and
- * send segment to peer (with FIN).
- */
- struct tcpcb *
- tcp_disconnect(tp)
- register struct tcpcb *tp;
- {
- struct socket *so = tp->t_inpcb->inp_socket;
- #ifdef WV_INSTRUMENTATION
- #ifdef INCLUDE_WVNET /* WV_NET_VERBOSE event */
- WV_NET_MARKER_1 (NET_AUX_EVENT, WV_NET_VERBOSE, 42, 10,
- WV_NETEVENT_TCPDISCONN_START, so->so_fd)
- #endif /* INCLUDE_WVNET */
- #endif
- if (tp->t_state < TCPS_ESTABLISHED)
- tp = tcp_close(tp);
- else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
- tp = tcp_drop(tp, 0);
- else {
- soisdisconnecting(so);
- sbflush(&so->so_rcv);
- tp = tcp_usrclosed(tp);
- if (tp)
- (void) tcp_output(tp);
- }
- return (tp);
- }
- /*
- * User issued close, and wish to trail through shutdown states:
- * if never received SYN, just forget it. If got a SYN from peer,
- * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
- * If already got a FIN from peer, then almost done; go to LAST_ACK
- * state. In all other cases, have already sent FIN to peer (e.g.
- * after PRU_SHUTDOWN), and just have to play tedious game waiting
- * for peer to send FIN or not respond to keep-alives, etc.
- * We can let the user exit from the close as soon as the FIN is acked.
- */
- struct tcpcb *
- tcp_usrclosed(tp)
- register struct tcpcb *tp;
- {
- switch (tp->t_state) {
- case TCPS_CLOSED:
- case TCPS_LISTEN:
- case TCPS_SYN_SENT:
- tp->t_state = TCPS_CLOSED;
- tp = tcp_close(tp);
- break;
- case TCPS_SYN_RECEIVED:
- case TCPS_ESTABLISHED:
- tp->t_state = TCPS_FIN_WAIT_1;
- break;
- case TCPS_CLOSE_WAIT:
- tp->t_state = TCPS_LAST_ACK;
- break;
- }
- if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
- soisdisconnected(tp->t_inpcb->inp_socket);
- return (tp);
- }
- LOCAL void tcpTraceStub ()
- {
- }
- LOCAL void tcpReportStub ()
- {
- }