Skip to content

Commit 40ba3cd

Browse files
Erik Hugnedavem330
authored andcommitted
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a reassembly buffer large enough to hold the data from this and all subsequent fragments of the message is allocated. The payload of each new fragment is copied into this buffer upon arrival. When the last fragment is received, the reassembled message is delivered upwards to the port/socket layer. Not only is this an inefficient approach, but it may also cause bursts of reassembly failures in low memory situations. since we may fail to allocate the necessary large buffer in the first place. Furthermore, after 100 subsequent such failures the link will be reset, something that in reality aggravates the situation. To remedy this problem, this patch introduces a different approach. Instead of allocating a big reassembly buffer, we now append the arriving fragments to a reassembly chain on the link, and deliver the whole chain up to the socket layer once the last fragment has been received. This is safe because the retransmission layer of a TIPC link always delivers packets in strict uninterrupted order, to the reassembly layer as to all other upper layers. Hence there can never be more than one fragment chain pending reassembly at any given time in a link, and we can trust (but still verify) that the fragments will be chained up in the correct order. Signed-off-by: Erik Hugne <[email protected]> Reviewed-by: Paul Gortmaker <[email protected]> Signed-off-by: Jon Maloy <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 528f6f4 commit 40ba3cd

File tree

6 files changed

+74
-142
lines changed

6 files changed

+74
-142
lines changed

net/tipc/bcast.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,15 +480,19 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
480480
tipc_node_unlock(node);
481481
tipc_link_recv_bundle(buf);
482482
} else if (msg_user(msg) == MSG_FRAGMENTER) {
483-
int ret = tipc_link_recv_fragment(&node->bclink.defragm,
484-
&buf, &msg);
485-
if (ret < 0)
483+
int ret;
484+
ret = tipc_link_recv_fragment(&node->bclink.reasm_head,
485+
&node->bclink.reasm_tail,
486+
&buf);
487+
if (ret == LINK_REASM_ERROR)
486488
goto unlock;
487489
spin_lock_bh(&bc_lock);
488490
bclink_accept_pkt(node, seqno);
489491
bcl->stats.recv_fragments++;
490-
if (ret > 0) {
492+
if (ret == LINK_REASM_COMPLETE) {
491493
bcl->stats.recv_fragmented++;
494+
/* Point msg to inner header */
495+
msg = buf_msg(buf);
492496
spin_unlock_bh(&bc_lock);
493497
goto receive;
494498
}

net/tipc/link.c

Lines changed: 44 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -404,15 +404,9 @@ static void link_release_outqueue(struct tipc_link *l_ptr)
404404
*/
405405
void tipc_link_reset_fragments(struct tipc_link *l_ptr)
406406
{
407-
struct sk_buff *buf = l_ptr->defragm_buf;
408-
struct sk_buff *next;
409-
410-
while (buf) {
411-
next = buf->next;
412-
kfree_skb(buf);
413-
buf = next;
414-
}
415-
l_ptr->defragm_buf = NULL;
407+
kfree_skb(l_ptr->reasm_head);
408+
l_ptr->reasm_head = NULL;
409+
l_ptr->reasm_tail = NULL;
416410
}
417411

418412
/**
@@ -1649,13 +1643,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
16491643
continue;
16501644
case MSG_FRAGMENTER:
16511645
l_ptr->stats.recv_fragments++;
1652-
ret = tipc_link_recv_fragment(&l_ptr->defragm_buf,
1653-
&buf, &msg);
1654-
if (ret == 1) {
1646+
ret = tipc_link_recv_fragment(&l_ptr->reasm_head,
1647+
&l_ptr->reasm_tail,
1648+
&buf);
1649+
if (ret == LINK_REASM_COMPLETE) {
16551650
l_ptr->stats.recv_fragmented++;
1651+
msg = buf_msg(buf);
16561652
goto deliver;
16571653
}
1658-
if (ret == -1)
1654+
if (ret == LINK_REASM_ERROR)
16591655
l_ptr->next_in_no--;
16601656
tipc_node_unlock(n_ptr);
16611657
continue;
@@ -2342,115 +2338,48 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
23422338
return dsz;
23432339
}
23442340

2345-
/*
2346-
* A pending message being re-assembled must store certain values
2347-
* to handle subsequent fragments correctly. The following functions
2348-
* help storing these values in unused, available fields in the
2349-
* pending message. This makes dynamic memory allocation unnecessary.
2350-
*/
2351-
static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno)
2352-
{
2353-
msg_set_seqno(buf_msg(buf), seqno);
2354-
}
2355-
2356-
static u32 get_fragm_size(struct sk_buff *buf)
2357-
{
2358-
return msg_ack(buf_msg(buf));
2359-
}
2360-
2361-
static void set_fragm_size(struct sk_buff *buf, u32 sz)
2362-
{
2363-
msg_set_ack(buf_msg(buf), sz);
2364-
}
2365-
2366-
static u32 get_expected_frags(struct sk_buff *buf)
2367-
{
2368-
return msg_bcast_ack(buf_msg(buf));
2369-
}
2370-
2371-
static void set_expected_frags(struct sk_buff *buf, u32 exp)
2372-
{
2373-
msg_set_bcast_ack(buf_msg(buf), exp);
2374-
}
2375-
23762341
/*
23772342
* tipc_link_recv_fragment(): Called with node lock on. Returns
23782343
* the reassembled buffer if message is complete.
23792344
*/
2380-
int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2381-
struct tipc_msg **m)
2382-
{
2383-
struct sk_buff *prev = NULL;
2384-
struct sk_buff *fbuf = *fb;
2385-
struct tipc_msg *fragm = buf_msg(fbuf);
2386-
struct sk_buff *pbuf = *pending;
2387-
u32 long_msg_seq_no = msg_long_msgno(fragm);
2388-
2389-
*fb = NULL;
2390-
2391-
/* Is there an incomplete message waiting for this fragment? */
2392-
while (pbuf && ((buf_seqno(pbuf) != long_msg_seq_no) ||
2393-
(msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) {
2394-
prev = pbuf;
2395-
pbuf = pbuf->next;
2396-
}
2397-
2398-
if (!pbuf && (msg_type(fragm) == FIRST_FRAGMENT)) {
2399-
struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
2400-
u32 msg_sz = msg_size(imsg);
2401-
u32 fragm_sz = msg_data_sz(fragm);
2402-
u32 exp_fragm_cnt;
2403-
u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
2404-
2405-
if (msg_type(imsg) == TIPC_MCAST_MSG)
2406-
max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
2407-
if (fragm_sz == 0 || msg_size(imsg) > max) {
2408-
kfree_skb(fbuf);
2409-
return 0;
2410-
}
2411-
exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
2412-
pbuf = tipc_buf_acquire(msg_size(imsg));
2413-
if (pbuf != NULL) {
2414-
pbuf->next = *pending;
2415-
*pending = pbuf;
2416-
skb_copy_to_linear_data(pbuf, imsg,
2417-
msg_data_sz(fragm));
2418-
/* Prepare buffer for subsequent fragments. */
2419-
set_long_msg_seqno(pbuf, long_msg_seq_no);
2420-
set_fragm_size(pbuf, fragm_sz);
2421-
set_expected_frags(pbuf, exp_fragm_cnt - 1);
2422-
} else {
2423-
pr_debug("Link unable to reassemble fragmented message\n");
2424-
kfree_skb(fbuf);
2425-
return -1;
2426-
}
2427-
kfree_skb(fbuf);
2428-
return 0;
2429-
} else if (pbuf && (msg_type(fragm) != FIRST_FRAGMENT)) {
2430-
u32 dsz = msg_data_sz(fragm);
2431-
u32 fsz = get_fragm_size(pbuf);
2432-
u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
2433-
u32 exp_frags = get_expected_frags(pbuf) - 1;
2434-
skb_copy_to_linear_data_offset(pbuf, crs,
2435-
msg_data(fragm), dsz);
2436-
kfree_skb(fbuf);
2437-
2438-
/* Is message complete? */
2439-
if (exp_frags == 0) {
2440-
if (prev)
2441-
prev->next = pbuf->next;
2442-
else
2443-
*pending = pbuf->next;
2444-
msg_reset_reroute_cnt(buf_msg(pbuf));
2445-
*fb = pbuf;
2446-
*m = buf_msg(pbuf);
2447-
return 1;
2448-
}
2449-
set_expected_frags(pbuf, exp_frags);
2345+
int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail,
2346+
struct sk_buff **fbuf)
2347+
{
2348+
struct sk_buff *frag = *fbuf;
2349+
struct tipc_msg *msg = buf_msg(frag);
2350+
u32 fragid = msg_type(msg);
2351+
bool headstolen;
2352+
int delta;
2353+
2354+
skb_pull(frag, msg_hdr_sz(msg));
2355+
if (fragid == FIRST_FRAGMENT) {
2356+
if (*head || skb_unclone(frag, GFP_ATOMIC))
2357+
goto out_free;
2358+
*head = frag;
2359+
skb_frag_list_init(*head);
24502360
return 0;
2361+
} else if (skb_try_coalesce(*head, frag, &headstolen, &delta)) {
2362+
kfree_skb_partial(frag, headstolen);
2363+
} else {
2364+
if (!*head)
2365+
goto out_free;
2366+
if (!skb_has_frag_list(*head))
2367+
skb_shinfo(*head)->frag_list = frag;
2368+
else
2369+
(*tail)->next = frag;
2370+
*tail = frag;
2371+
(*head)->truesize += frag->truesize;
2372+
}
2373+
if (fragid == LAST_FRAGMENT) {
2374+
*fbuf = *head;
2375+
*tail = *head = NULL;
2376+
return LINK_REASM_COMPLETE;
24512377
}
2452-
kfree_skb(fbuf);
24532378
return 0;
2379+
out_free:
2380+
pr_warn_ratelimited("Link unable to reassemble fragmented message\n");
2381+
kfree_skb(*fbuf);
2382+
return LINK_REASM_ERROR;
24542383
}
24552384

24562385
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)

net/tipc/link.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@
4040
#include "msg.h"
4141
#include "node.h"
4242

43+
/*
44+
* Link reassembly status codes
45+
*/
46+
#define LINK_REASM_ERROR -1
47+
#define LINK_REASM_COMPLETE 1
48+
4349
/*
4450
* Out-of-range value for link sequence numbers
4551
*/
@@ -134,7 +140,8 @@ struct tipc_stats {
134140
* @next_out: ptr to first unsent outbound message in queue
135141
* @waiting_ports: linked list of ports waiting for link congestion to abate
136142
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
137-
* @defragm_buf: list of partially reassembled inbound message fragments
143+
* @reasm_head: list head of partially reassembled inbound message fragments
144+
* @reasm_tail: last fragment received
138145
* @stats: collects statistics regarding link activity
139146
*/
140147
struct tipc_link {
@@ -196,9 +203,10 @@ struct tipc_link {
196203
struct sk_buff *next_out;
197204
struct list_head waiting_ports;
198205

199-
/* Fragmentation/defragmentation */
206+
/* Fragmentation/reassembly */
200207
u32 long_msg_seq_no;
201-
struct sk_buff *defragm_buf;
208+
struct sk_buff *reasm_head;
209+
struct sk_buff *reasm_tail;
202210

203211
/* Statistics */
204212
struct tipc_stats stats;
@@ -229,9 +237,9 @@ int tipc_link_send_sections_fast(struct tipc_port *sender,
229237
struct iovec const *msg_sect,
230238
unsigned int len, u32 destnode);
231239
void tipc_link_recv_bundle(struct sk_buff *buf);
232-
int tipc_link_recv_fragment(struct sk_buff **pending,
233-
struct sk_buff **fb,
234-
struct tipc_msg **msg);
240+
int tipc_link_recv_fragment(struct sk_buff **reasm_head,
241+
struct sk_buff **reasm_tail,
242+
struct sk_buff **fbuf);
235243
void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob,
236244
u32 gap, u32 tolerance, u32 priority,
237245
u32 acked_mtu);

net/tipc/msg.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -554,12 +554,6 @@ static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
554554
msg_set_bits(m, 4, 16, 0xffff, n);
555555
}
556556

557-
558-
static inline u32 msg_fragm_no(struct tipc_msg *m)
559-
{
560-
return msg_bits(m, 4, 16, 0xffff);
561-
}
562-
563557
static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
564558
{
565559
msg_set_bits(m, 4, 16, 0xffff, n);
@@ -576,12 +570,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u32 n)
576570
msg_set_bits(m, 4, 0, 0xffff, n);
577571
}
578572

579-
580-
static inline u32 msg_long_msgno(struct tipc_msg *m)
581-
{
582-
return msg_bits(m, 4, 0, 0xffff);
583-
}
584-
585573
static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
586574
{
587575
msg_set_bits(m, 4, 0, 0xffff, n);

net/tipc/node.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
298298
}
299299
n_ptr->bclink.deferred_size = 0;
300300

301-
if (n_ptr->bclink.defragm) {
302-
kfree_skb(n_ptr->bclink.defragm);
303-
n_ptr->bclink.defragm = NULL;
301+
if (n_ptr->bclink.reasm_head) {
302+
kfree_skb(n_ptr->bclink.reasm_head);
303+
n_ptr->bclink.reasm_head = NULL;
304+
n_ptr->bclink.reasm_tail = NULL;
304305
}
305306

306307
tipc_bclink_remove_node(n_ptr->addr);

net/tipc/node.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@
7474
* @deferred_size: number of OOS b'cast messages in deferred queue
7575
* @deferred_head: oldest OOS b'cast message received from node
7676
* @deferred_tail: newest OOS b'cast message received from node
77-
* @defragm: list of partially reassembled b'cast message fragments from node
77+
* @reasm_head: broadcast reassembly queue head from node
78+
* @reasm_tail: last broadcast fragment received from node
7879
* @recv_permitted: true if node is allowed to receive b'cast messages
7980
*/
8081
struct tipc_node {
@@ -98,7 +99,8 @@ struct tipc_node {
9899
u32 deferred_size;
99100
struct sk_buff *deferred_head;
100101
struct sk_buff *deferred_tail;
101-
struct sk_buff *defragm;
102+
struct sk_buff *reasm_head;
103+
struct sk_buff *reasm_tail;
102104
bool recv_permitted;
103105
} bclink;
104106
};

0 commit comments

Comments
 (0)