[SCTP] Move rwnd accounting and I/O redrive off of the skb destructor.

When the skb was shared with Ethereal, Ethereal was sometimes the last user and the destructor would get called on another CPU, not knowing anything about our sock_lock. Move our rwnd updates and I/O redrive out of the skb destructor. Also, if unable to allocate an skb for our transmission packet, walk the packet's chunks and free the control chunks. Also change list_dels to list_del_init. Fix real later, but this prevent us from a doing damage if we list_del twice.

[SCTP] Move rwnd accounting and I/O redrive off of the skb destructor.
When the skb was shared with Ethereal, Ethereal was sometimes the last user and the destructor would get called on another CPU, not knowing anything about our sock_lock. Move our rwnd updates and I/O redrive out of the skb destructor. Also, if unable to allocate an skb for our transmission packet, walk the packet's chunks and free the control chunks. Also change list_dels to list_del_init. Fix real later, but this prevent us from a doing damage if we list_del twice.
1a750002 · Jon Grimm · 87611822 · 1a750002 · 1a750002 · 1a750002
Commit 1a750002 authored Jul 03, 2003 by Jon Grimm
10 changed files
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -74,7 +74,9 @@ static inline struct sctp_ulpevent *sctp_skb2event(struct sk_buff *skb)
 struct sctp_ulpevent *sctp_ulpevent_new(int size, int flags, int gfp);
 struct sctp_ulpevent *sctp_ulpevent_init(struct sctp_ulpevent *, int flags);
 void sctp_ulpevent_free(struct sctp_ulpevent *);
+void sctp_ulpevent_kfree_skb(struct sk_buff *skb);
 int sctp_ulpevent_is_notification(const struct sctp_ulpevent *);
+void sctp_queue_purge_ulpevents(struct sk_buff_head *list);

 struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,

--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -877,7 +877,7 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk)
 	/* Delete the association from the old endpoint's list of
 	 * associations.
 	 */
-	list_del(&assoc->asocs);
+	list_del_init(&assoc->asocs);

 	/* Decrement the backlog value for a TCP-style socket. */
 	if (sctp_style(oldsk, TCP))

--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -85,7 +85,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)

 	/* Release all references. */
 	list_for_each_safe(pos, temp, &msg->chunks) {
-		list_del(pos);
+		list_del_init(pos);
 		chunk = list_entry(pos, struct sctp_chunk, frag_list);
 		/* Check whether we _really_ need to notify. */
 		if (notify < 0) {
@@ -294,7 +294,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,

 errout:
 	list_for_each_safe(pos, temp, &msg->chunks) {
-		list_del(pos);
+		list_del_init(pos);
 		chunk = list_entry(pos, struct sctp_chunk, frag_list);
 		sctp_chunk_free(chunk);
 	}

--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -97,6 +97,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
 	packet->source_port = sport;
 	packet->destination_port = dport;
 	skb_queue_head_init(&packet->chunks);
+	packet->size = SCTP_IP_OVERHEAD;
 	packet->vtag = 0;
 	packet->ecn_capable = 0;
 	packet->get_prepend_chunk = NULL;
@@ -219,9 +220,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
 		/* Both control chunks and data chunks with TSNs are
 		 * non-fragmentable.
 		 */
-		if (packet_empty) {
-
-			/* We no longer do refragmentation at all.
+		if (packet_empty || !sctp_chunk_is_data(chunk)) {
+			/* We no longer do re-fragmentation.
 			 * Just fragment at the IP layer, if we
 			 * actually hit this condition
 			 */
@@ -229,7 +229,7 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
 			packet->ipfragok = 1;
 			goto append;

-		} else { /* !packet_empty */
+		} else {
 			retval = SCTP_XMIT_PMTU_FULL;
 			goto finish;
 		}
@@ -283,20 +283,18 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	__u8 has_data = 0;
 	struct dst_entry *dst;

-	/* Do NOT generate a chunkless packet... */
-	if (skb_queue_empty(&packet->chunks))
+	/* Do NOT generate a chunkless packet. */
+	chunk = (struct sctp_chunk *)skb_peek(&packet->chunks);
+	if (unlikely(!chunk))
 		return err;

 	/* Set up convenience variables... */
-	chunk = (struct sctp_chunk *) (packet->chunks.next);
 	sk = chunk->skb->sk;

 	/* Allocate the new skb.  */
 	nskb = dev_alloc_skb(packet->size);
-	if (!nskb) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!nskb)
+		goto nomem;

 	/* Make sure the outbound skb has enough header room reserved. */
 	skb_reserve(nskb, SCTP_IP_OVERHEAD);
@@ -468,9 +466,11 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	if (!nskb->dst)
 		goto no_route;

-	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb length %d\n",
+	SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
 			  nskb->len);
+
 	(*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
+
 out:
 	packet->size = SCTP_IP_OVERHEAD;
 	return err;
@@ -486,7 +486,20 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * required.
 	 */
 	 /* err = -EHOSTUNREACH; */
+err:
+	/* Control chunks are unreliable so just drop them.  DATA chunks
+	 * will get resent or dropped later.
+	 */
+
+	while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks))) {
+		if (!sctp_chunk_is_data(chunk))
+    			sctp_chunk_free(chunk);
+	}
 	goto out;
+nomem:
+	err = -ENOMEM;
+	printk("%s alloc_skb failed.\n", __FUNCTION__);
+	goto err;
 }

 /********************************************************************

--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -258,7 +258,7 @@ void sctp_outq_teardown(struct sctp_outq *q)

 	/* Throw away chunks that have been gap ACKed.  */
 	list_for_each_safe(lchunk, temp, &q->sacked) {
-		list_del(lchunk);
+		list_del_init(lchunk);
 		chunk = list_entry(lchunk, struct sctp_chunk,
 				   transmitted_list);
 		sctp_datamsg_fail(chunk, q->error);
@@ -267,7 +267,7 @@ void sctp_outq_teardown(struct sctp_outq *q)

 	/* Throw away any chunks in the retransmit queue. */
 	list_for_each_safe(lchunk, temp, &q->retransmit) {
-		list_del(lchunk);
+		list_del_init(lchunk);
 		chunk = list_entry(lchunk, struct sctp_chunk,
 				   transmitted_list);
 		sctp_datamsg_fail(chunk, q->error);
@@ -445,7 +445,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
 			/* Move the chunk to the retransmit queue. The chunks
 			 * on the retransmit queue is always kept in order.
 			 */
-			list_del(lchunk);
+			list_del_init(lchunk);
 			sctp_retransmit_insert(lchunk, q);
 		}
 	}
@@ -1007,7 +1007,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	struct sctp_association *asoc = q->asoc;
 	struct sctp_transport *transport;
 	struct sctp_chunk *tchunk;
-	struct list_head *lchunk, *transport_list, *pos;
+	struct list_head *lchunk, *transport_list, *pos, *temp;
 	sctp_sack_variable_t *frags = sack->variable;
 	__u32 sack_ctsn, ctsn, tsn;
 	__u32 highest_tsn, highest_new_tsn;
@@ -1115,14 +1115,12 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 			  "%p is 0x%x.\n", __FUNCTION__, asoc, ctsn);

 	/* Throw away stuff rotting on the sack queue.  */
-	list_for_each(lchunk, &q->sacked) {
+	list_for_each_safe(lchunk, temp, &q->sacked) {
 		tchunk = list_entry(lchunk, struct sctp_chunk,
 				    transmitted_list);
 		tsn = ntohl(tchunk->subh.data_hdr->tsn);
-		if (TSN_lte(tsn, ctsn)) {
-			lchunk = lchunk->prev;
+		if (TSN_lte(tsn, ctsn))
 			sctp_chunk_free(tchunk);
-		}
 	}

 	/* ii) Set rwnd equal to the newly received a_rwnd minus the

--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1089,7 +1089,7 @@ void sctp_chunk_free(struct sctp_chunk *chunk)
 {
 	/* Make sure that we are not on any list.  */
 	skb_unlink((struct sk_buff *) chunk);
-	list_del(&chunk->transmitted_list);
+	list_del_init(&chunk->transmitted_list);

 	/* Release our reference on the message tracker. */
 	if (chunk->msg)
@@ -1850,7 +1850,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 	/* Release the transport structures. */
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		transport = list_entry(pos, struct sctp_transport, transports);
-		list_del(pos);
+		list_del_init(pos);
 		sctp_transport_free(transport);
 	}
 nomem:

--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -768,8 +768,8 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 	}

 	/* Clean up any skbs sitting on the receive queue.  */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sctp_sk(sk)->pd_lobby);
+	sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
+	sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);

 	/* On a TCP-style socket, block for at most linger_time if set. */
 	if (sctp_style(sk, TCP) && timeout)
@@ -1354,7 +1354,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 		msg->msg_flags &= ~MSG_EOR;

 out_free:
-	sctp_ulpevent_free(event); /* Free the skb. */
+	sctp_ulpevent_kfree_skb(skb); /* Free the skb. */
 out:
 	sctp_release_sock(sk);
 	return err;

--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -117,6 +117,7 @@ struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	INIT_LIST_HEAD(&peer->transmitted);
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
+	sctp_packet_init(&peer->packet, peer, 0, 0);

 	/* Set up the retransmission timer.  */
 	init_timer(&peer->T3_rtx_timer);

--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -82,6 +82,7 @@ struct sctp_ulpevent *sctp_ulpevent_init(struct sctp_ulpevent *event,
 	return event;
 }

+
 /* Dispose of an event.  */
 void sctp_ulpevent_free(struct sctp_ulpevent *event)
 {
@@ -336,7 +337,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
 	 */
-	skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), 
+	skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
 			      0, gfp);

 	/* Pull off the rest of the cause TLV from the chunk.  */
@@ -502,7 +503,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));

 	/* Per TSVWG discussion with Randy. Allow the application to
-	 * ressemble a fragmented message. 
+	 * ressemble a fragmented message.
 	 */
 	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;

@@ -596,7 +597,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
 * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
 */
 struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
-						struct sctp_chunk *chunk, 
+						struct sctp_chunk *chunk,
 						int gfp)
 {
 	struct sctp_ulpevent *event;
@@ -793,6 +794,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
 	 *
 	 * The association id field, holds the identifier for the association.
 	 */
+	sctp_ulpevent_set_owner(skb, asoc);
 	pd->pdapi_assoc_id = sctp_assoc2id(asoc);

 	return event;
@@ -824,11 +826,25 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 	}
 }

+/* Stub skb destructor.  */
+static void sctp_stub_rfree(struct sk_buff *skb)
+{
+/* WARNING:  This function is just a warning not to use the
+ * skb destructor.  If the skb is shared, we may get the destructor
+ * callback on some processor that does not own the sock_lock.  This
+ * was occuring with PACKET socket applications that were monitoring
+ * our skbs.   We can't take the sock_lock, because we can't risk
+ * recursing if we do really own the sock lock.  Instead, do all
+ * of our rwnd manipulation while we own the sock_lock outright.
+ */
+}
+
 /* Do accounting for bytes just read by user.  */
 static void sctp_rcvmsg_rfree(struct sk_buff *skb)
 {
 	struct sctp_association *asoc;
 	struct sctp_ulpevent *event;
+	struct sk_buff *frag;

 	/* Current stack structures assume that the rcv buffer is
 	 * per socket.   For UDP style sockets this is not true as
@@ -836,9 +852,16 @@ static void sctp_rcvmsg_rfree(struct sk_buff *skb)
 	 * Use the local private area of the skb to track the owning
 	 * association.
 	 */
+
 	event = sctp_skb2event(skb);
 	asoc = event->asoc;
 	sctp_assoc_rwnd_increase(asoc, skb_headlen(skb));
+
+	/* Don't forget the fragments. */
+	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+		/* NOTE:  skb_shinfos are recursive. */
+		sctp_rcvmsg_rfree(frag);
+	}
 	sctp_association_put(asoc);
 }

@@ -859,8 +882,7 @@ static void sctp_ulpevent_set_owner_r(struct sk_buff *skb,
 	event = sctp_skb2event(skb);
 	event->asoc = asoc;

-	skb->destructor = sctp_rcvmsg_rfree;
-
+	skb->destructor = sctp_stub_rfree;
 	sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
 }

@@ -868,7 +890,6 @@ static void sctp_ulpevent_set_owner_r(struct sk_buff *skb,
 static void sctp_ulpevent_rfree(struct sk_buff *skb)
 {
 	struct sctp_ulpevent *event;
-
 	event = sctp_skb2event(skb);
 	sctp_association_put(event->asoc);
 }
@@ -888,5 +909,28 @@ static void sctp_ulpevent_set_owner(struct sk_buff *skb,
 	skb->sk = asoc->base.sk;
 	event = sctp_skb2event(skb);
 	event->asoc = (struct sctp_association *)asoc;
-	skb->destructor = sctp_ulpevent_rfree;
+	skb->destructor = sctp_stub_rfree;
+}
+
+/* Free a ulpevent that has an owner.  See comments in
+ * sctp_stub_rfree().
+ */
+void sctp_ulpevent_kfree_skb(struct sk_buff *skb)
+{
+	struct sctp_ulpevent *event;
+
+	event = sctp_skb2event(skb);
+	if (sctp_ulpevent_is_notification(event))
+		sctp_ulpevent_rfree(skb);
+	else
+		sctp_rcvmsg_rfree(skb);
+	kfree_skb(skb);
+}
+
+/* Purge the skb lists holding ulpevents. */
+void sctp_queue_purge_ulpevents(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(list)) != NULL)
+		sctp_ulpevent_kfree_skb(skb);
 }
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -99,12 +99,12 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)

 	while ((skb = __skb_dequeue(&ulpq->lobby))) {
 		event = sctp_skb2event(skb);
-		sctp_ulpevent_free(event);
+		sctp_ulpevent_kfree_skb(skb);
 	}

 	while ((skb = __skb_dequeue(&ulpq->reasm))) {
 		event = sctp_skb2event(skb);
-		sctp_ulpevent_free(event);
+		sctp_ulpevent_kfree_skb(skb);
 	}

 }
@@ -235,9 +235,9 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)

 out_free:
 	if (sctp_event2skb(event)->list)
-		skb_queue_purge(sctp_event2skb(event)->list);
+		sctp_queue_purge_ulpevents(sctp_event2skb(event)->list);
 	else
-		kfree_skb(sctp_event2skb(event));
+		sctp_ulpevent_kfree_skb(sctp_event2skb(event));
 	return 0;
 }

@@ -289,7 +289,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
 * payload was fragmented on the way and ip had to reassemble them.
 * We add the rest of skb's to the first skb's fraglist.
 */
-static inline struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
 {
 	struct sk_buff *pos;
 	struct sctp_ulpevent *event;
@@ -325,11 +325,10 @@ static inline struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *

 		/* Remove the fragment from the reassembly queue.  */
 		__skb_unlink(pos, pos->list);
-
+	
 		/* Break if we have reached the last fragment.  */
 		if (pos == l_frag)
 			break;
-
 		pos->next = pnext;
 		pos = pnext;
 	};
@@ -697,7 +696,7 @@ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed)
 		event = sctp_skb2event(skb);
 		tsn = event->sndrcvinfo.sinfo_tsn;

-		sctp_ulpevent_free(event);
+		sctp_ulpevent_kfree_skb(skb);
 		sctp_tsnmap_renege(tsnmap, tsn);
 		if (freed >= needed)
 			return freed;
@@ -723,7 +722,7 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
 		event = sctp_skb2event(skb);
 		tsn = event->sndrcvinfo.sinfo_tsn;

-		sctp_ulpevent_free(event);
+		sctp_ulpevent_kfree_skb(skb);
 		sctp_tsnmap_renege(tsnmap, tsn);
 		if (freed >= needed)
 			return freed;