Commit 54f47c5d authored by Julian Anastasov's avatar Julian Anastasov Committed by Arnaldo Carvalho de Melo

[IPVS]: Properly handle non-linear skbs.

Most of the changes come from Paul `Rusty' Russell. Now we
modify the skbs only for IPVS packets.
parent 3a9a3e7d
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <asm/types.h> /* For __uXX types */ #include <asm/types.h> /* For __uXX types */
#define IP_VS_VERSION_CODE 0x010107 #define IP_VS_VERSION_CODE 0x010108
#define NVERSION(version) \ #define NVERSION(version) \
(version >> 16) & 0xFF, \ (version >> 16) & 0xFF, \
(version >> 8) & 0xFF, \ (version >> 8) & 0xFF, \
...@@ -272,22 +272,22 @@ extern int ip_vs_get_debug_level(void); ...@@ -272,22 +272,22 @@ extern int ip_vs_get_debug_level(void);
if (net_ratelimit()) \ if (net_ratelimit()) \
printk(KERN_DEBUG "IPVS: " msg); \ printk(KERN_DEBUG "IPVS: " msg); \
} while (0) } while (0)
#define IP_VS_DBG_PKT(level, pp, iph, msg) \ #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \
do { \ do { \
if (level <= ip_vs_get_debug_level()) \ if (level <= ip_vs_get_debug_level()) \
pp->debug_packet(pp, iph, msg); \ pp->debug_packet(pp, skb, ofs, msg); \
} while (0) } while (0)
#define IP_VS_DBG_RL_PKT(level, pp, iph, msg) \ #define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \
do { \ do { \
if (level <= ip_vs_get_debug_level() && \ if (level <= ip_vs_get_debug_level() && \
net_ratelimit()) \ net_ratelimit()) \
pp->debug_packet(pp, iph, msg); \ pp->debug_packet(pp, skb, ofs, msg); \
} while (0) } while (0)
#else /* NO DEBUGGING at ALL */ #else /* NO DEBUGGING at ALL */
#define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0)
#define IP_VS_DBG_RL(msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0)
#define IP_VS_DBG_PKT(level, pp, iph, msg) do {} while (0) #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0)
#define IP_VS_DBG_RL_PKT(level, pp, iph, msg) do {} while (0) #define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0)
#endif #endif
#define IP_VS_BUG() BUG() #define IP_VS_BUG() BUG()
...@@ -395,18 +395,6 @@ enum { ...@@ -395,18 +395,6 @@ enum {
IP_VS_ICMP_S_LAST, IP_VS_ICMP_S_LAST,
}; };
/*
* Transport protocol header
*/
union ip_vs_tphdr {
unsigned char *raw;
struct udphdr *uh;
struct tcphdr *th;
struct icmphdr *icmph;
__u16 *portp;
};
/* /*
* Delta sequence info structure * Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes). * Each ip_vs_conn has 2 (output AND input seq. changes).
...@@ -459,36 +447,36 @@ struct ip_vs_protocol { ...@@ -459,36 +447,36 @@ struct ip_vs_protocol {
void (*exit)(struct ip_vs_protocol *pp); void (*exit)(struct ip_vs_protocol *pp);
int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_protocol *pp, int (*conn_schedule)(struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp); int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn * struct ip_vs_conn *
(*conn_in_get)(struct sk_buff *skb, (*conn_in_get)(const struct sk_buff *skb,
struct ip_vs_protocol *pp, struct iphdr *iph, struct ip_vs_protocol *pp,
union ip_vs_tphdr h, int inverse); const struct iphdr *iph,
unsigned int proto_off,
int inverse);
struct ip_vs_conn * struct ip_vs_conn *
(*conn_out_get)(struct sk_buff *skb, (*conn_out_get)(const struct sk_buff *skb,
struct ip_vs_protocol *pp, struct iphdr *iph, struct ip_vs_protocol *pp,
union ip_vs_tphdr h, int inverse); const struct iphdr *iph,
unsigned int proto_off,
int inverse);
int (*snat_handler)(struct sk_buff *skb, int (*snat_handler)(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
struct iphdr *iph, union ip_vs_tphdr h, int size);
int (*dnat_handler)(struct sk_buff *skb, int (*dnat_handler)(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
struct iphdr *iph, union ip_vs_tphdr h, int size);
int (*csum_check)(struct sk_buff *skb, int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
struct ip_vs_protocol *pp, struct iphdr *iph,
union ip_vs_tphdr h, int size);
const char *(*state_name)(int state); const char *(*state_name)(int state);
int (*state_transition)(struct ip_vs_conn *cp, int direction, int (*state_transition)(struct ip_vs_conn *cp, int direction,
struct iphdr *iph, union ip_vs_tphdr h, const struct sk_buff *skb,
struct ip_vs_protocol *pp); struct ip_vs_protocol *pp);
int (*register_app)(struct ip_vs_app *inc); int (*register_app)(struct ip_vs_app *inc);
...@@ -497,8 +485,10 @@ struct ip_vs_protocol { ...@@ -497,8 +485,10 @@ struct ip_vs_protocol {
int (*app_conn_bind)(struct ip_vs_conn *cp); int (*app_conn_bind)(struct ip_vs_conn *cp);
void (*debug_packet)(struct ip_vs_protocol *pp, struct iphdr *iph, void (*debug_packet)(struct ip_vs_protocol *pp,
char *msg); const struct sk_buff *skb,
int offset,
const char *msg);
void (*timeout_change)(struct ip_vs_protocol *pp, int flags); void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
...@@ -638,7 +628,7 @@ struct ip_vs_scheduler { ...@@ -638,7 +628,7 @@ struct ip_vs_scheduler {
/* selecting a server from the given service */ /* selecting a server from the given service */
struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc, struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
struct iphdr *iph); const struct sk_buff *skb);
}; };
...@@ -660,13 +650,13 @@ struct ip_vs_app ...@@ -660,13 +650,13 @@ struct ip_vs_app
__u16 port; /* port number in net order */ __u16 port; /* port number in net order */
atomic_t usecnt; /* usage counter */ atomic_t usecnt; /* usage counter */
/* output hook */ /* output hook: return false if can't linearize. diff set for TCP. */
int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *); struct sk_buff **, int *diff);
/* input hook */ /* input hook: return false if can't linearize. diff set for TCP. */
int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
struct sk_buff *); struct sk_buff **, int *diff);
/* ip_vs_app initializer */ /* ip_vs_app initializer */
int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
...@@ -686,20 +676,21 @@ struct ip_vs_app ...@@ -686,20 +676,21 @@ struct ip_vs_app
int timeouts_size; int timeouts_size;
int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app, int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
struct iphdr *iph, union ip_vs_tphdr h,
int *verdict, struct ip_vs_conn **cpp); int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn * struct ip_vs_conn *
(*conn_in_get)(struct sk_buff *skb, struct ip_vs_app *app, (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
struct iphdr *iph, union ip_vs_tphdr h, int inverse); const struct iphdr *iph, unsigned int proto_off,
int inverse);
struct ip_vs_conn * struct ip_vs_conn *
(*conn_out_get)(struct sk_buff *skb, struct ip_vs_app *app, (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
struct iphdr *iph, union ip_vs_tphdr h, int inverse); const struct iphdr *iph, unsigned int proto_off,
int inverse);
int (*state_transition)(struct ip_vs_conn *cp, int direction, int (*state_transition)(struct ip_vs_conn *cp, int direction,
struct iphdr *iph, const struct sk_buff *skb,
union ip_vs_tphdr h, struct ip_vs_app *app); struct ip_vs_app *app);
void (*timeout_change)(struct ip_vs_app *app, int flags); void (*timeout_change)(struct ip_vs_app *app, int flags);
}; };
...@@ -839,8 +830,8 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); ...@@ -839,8 +830,8 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc); extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb);
extern int ip_vs_skb_replace(struct sk_buff *skb, int pri, extern int ip_vs_skb_replace(struct sk_buff *skb, int pri,
char *o_buf, int o_len, char *n_buf, int n_len); char *o_buf, int o_len, char *n_buf, int n_len);
extern int ip_vs_app_init(void); extern int ip_vs_app_init(void);
...@@ -856,6 +847,10 @@ extern void ip_vs_protocol_timeout_change(int flags); ...@@ -856,6 +847,10 @@ extern void ip_vs_protocol_timeout_change(int flags);
extern int *ip_vs_create_timeout_table(int *table, int size); extern int *ip_vs_create_timeout_table(int *table, int size);
extern int extern int
ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to); ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to);
extern void
ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg);
extern struct ip_vs_protocol ip_vs_protocol_tcp; extern struct ip_vs_protocol ip_vs_protocol_tcp;
extern struct ip_vs_protocol ip_vs_protocol_udp; extern struct ip_vs_protocol ip_vs_protocol_udp;
extern struct ip_vs_protocol ip_vs_protocol_icmp; extern struct ip_vs_protocol ip_vs_protocol_icmp;
...@@ -875,9 +870,9 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); ...@@ -875,9 +870,9 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn * extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph); ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp, union ip_vs_tphdr h); struct ip_vs_protocol *pp);
/* /*
...@@ -940,7 +935,7 @@ extern int ip_vs_tunnel_xmit ...@@ -940,7 +935,7 @@ extern int ip_vs_tunnel_xmit
extern int ip_vs_dr_xmit extern int ip_vs_dr_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_icmp_xmit extern int ip_vs_icmp_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest); extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
...@@ -986,6 +981,11 @@ extern __inline__ char ip_vs_fwd_tag(struct ip_vs_conn *cp) ...@@ -986,6 +981,11 @@ extern __inline__ char ip_vs_fwd_tag(struct ip_vs_conn *cp)
return fwd; return fwd;
} }
extern int ip_vs_make_skb_writable(struct sk_buff **pskb, int len);
extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int dir);
extern u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
static inline u16 ip_vs_check_diff(u32 old, u32 new, u16 oldsum) static inline u16 ip_vs_check_diff(u32 old, u32 new, u16 oldsum)
{ {
......
...@@ -362,29 +362,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, ...@@ -362,29 +362,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
spin_unlock(&cp->lock); spin_unlock(&cp->lock);
} }
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
/* struct ip_vs_app *app)
* Output pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns (new - old) skb->len diff.
*/
int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
{ {
struct ip_vs_app *app;
int diff; int diff;
struct iphdr *iph; unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th; struct tcphdr *th;
__u32 seq; __u32 seq;
/* if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
* check if application module is bound to
* this ip_vs_conn.
*/
if ((app = cp->app) == NULL)
return 0; return 0;
iph = skb->nh.iph; th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* /*
* Remember seq number in case this pkt gets resized * Remember seq number in case this pkt gets resized
...@@ -394,54 +383,72 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -394,54 +383,72 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
/* /*
* Fix seq stuff if flagged as so. * Fix seq stuff if flagged as so.
*/ */
if (cp->protocol == IPPROTO_TCP) {
if (cp->flags & IP_VS_CONN_F_OUT_SEQ) if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
vs_fix_seq(&cp->out_seq, th); vs_fix_seq(&cp->out_seq, th);
if (cp->flags & IP_VS_CONN_F_IN_SEQ) if (cp->flags & IP_VS_CONN_F_IN_SEQ)
vs_fix_ack_seq(&cp->in_seq, th); vs_fix_ack_seq(&cp->in_seq, th);
}
/* /*
* Call private output hook function * Call private output hook function
*/ */
if (app->pkt_out == NULL) if (app->pkt_out == NULL)
return 0; return 1;
diff = app->pkt_out(app, cp, skb); if (!app->pkt_out(app, cp, pskb, &diff))
return 0;
/* /*
* Update ip_vs seq stuff if len has changed. * Update ip_vs seq stuff if len has changed.
*/ */
if (diff != 0 && cp->protocol == IPPROTO_TCP) if (diff != 0)
vs_seq_update(cp, &cp->out_seq, vs_seq_update(cp, &cp->out_seq,
IP_VS_CONN_F_OUT_SEQ, seq, diff); IP_VS_CONN_F_OUT_SEQ, seq, diff);
return diff; return 1;
} }
/* /*
* Input pkt hook. Will call bound ip_vs_app specific function * Output pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL. * called by ipvs packet handler, assumes previously checked cp!=NULL
* returns (new - old) skb->len diff. * returns false if it can't handle packet (oom)
*/ */
int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
{ {
struct ip_vs_app *app; struct ip_vs_app *app;
int diff;
struct iphdr *iph;
struct tcphdr *th;
__u32 seq;
/* /*
* check if application module is bound to * check if application module is bound to
* this ip_vs_conn. * this ip_vs_conn.
*/ */
if ((app = cp->app) == NULL) if ((app = cp->app) == NULL)
return 1;
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
return app_tcp_pkt_out(cp, pskb, app);
/*
* Call private output hook function
*/
if (app->pkt_out == NULL)
return 1;
return app->pkt_out(app, cp, pskb, NULL);
}
static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th;
__u32 seq;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0; return 0;
iph = skb->nh.iph; th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* /*
* Remember seq number in case this pkt gets resized * Remember seq number in case this pkt gets resized
...@@ -451,29 +458,57 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) ...@@ -451,29 +458,57 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
/* /*
* Fix seq stuff if flagged as so. * Fix seq stuff if flagged as so.
*/ */
if (cp->protocol == IPPROTO_TCP) {
if (cp->flags & IP_VS_CONN_F_IN_SEQ) if (cp->flags & IP_VS_CONN_F_IN_SEQ)
vs_fix_seq(&cp->in_seq, th); vs_fix_seq(&cp->in_seq, th);
if (cp->flags & IP_VS_CONN_F_OUT_SEQ) if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
vs_fix_ack_seq(&cp->out_seq, th); vs_fix_ack_seq(&cp->out_seq, th);
}
/* /*
* Call private input hook function * Call private input hook function
*/ */
if (app->pkt_in == NULL) if (app->pkt_in == NULL)
return 0; return 1;
diff = app->pkt_in(app, cp, skb); if (!app->pkt_in(app, cp, pskb, &diff))
return 0;
/* /*
* Update ip_vs seq stuff if len has changed. * Update ip_vs seq stuff if len has changed.
*/ */
if (diff != 0 && cp->protocol == IPPROTO_TCP) if (diff != 0)
vs_seq_update(cp, &cp->in_seq, vs_seq_update(cp, &cp->in_seq,
IP_VS_CONN_F_IN_SEQ, seq, diff); IP_VS_CONN_F_IN_SEQ, seq, diff);
return diff; return 1;
}
/*
* Input pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
{
struct ip_vs_app *app;
/*
* check if application module is bound to
* this ip_vs_conn.
*/
if ((app = cp->app) == NULL)
return 1;
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
return app_tcp_pkt_in(cp, pskb, app);
/*
* Call private input hook function
*/
if (app->pkt_in == NULL)
return 1;
return app->pkt_in(app, cp, pskb, NULL);
} }
......
This diff is collapsed.
...@@ -202,10 +202,11 @@ static inline int is_overloaded(struct ip_vs_dest *dest) ...@@ -202,10 +202,11 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
* Destination hashing scheduling * Destination hashing scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_dh_bucket *tbl; struct ip_vs_dh_bucket *tbl;
struct iphdr *iph = skb->nh.iph;
IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
......
...@@ -87,39 +87,46 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, ...@@ -87,39 +87,46 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
__u32 *addr, __u16 *port, __u32 *addr, __u16 *port,
char **start, char **end) char **start, char **end)
{ {
unsigned char p1,p2,p3,p4,p5,p6; unsigned char p[6];
int i = 0;
if (data_limit - data < plen) {
/* check if there is partial match */
if (strnicmp(data, pattern, data_limit - data) == 0)
return -1;
else
return 0;
}
while (data < data_limit) {
if (strnicmp(data, pattern, plen) != 0) { if (strnicmp(data, pattern, plen) != 0) {
data++; return 0;
continue;
} }
*start = data+plen; *start = data + plen;
p1 = simple_strtoul(data+plen, &data, 10);
if (*data != ',')
continue;
p2 = simple_strtoul(data+1, &data, 10);
if (*data != ',')
continue;
p3 = simple_strtoul(data+1, &data, 10);
if (*data != ',')
continue;
p4 = simple_strtoul(data+1, &data, 10);
if (*data != ',')
continue;
p5 = simple_strtoul(data+1, &data, 10);
if (*data != ',')
continue;
p6 = simple_strtoul(data+1, &data, 10);
if (*data != term)
continue;
for (data = *start; *data != term; data++) {
if (data == data_limit)
return -1;
}
*end = data; *end = data;
*addr = (p4<<24) | (p3<<16) | (p2<<8) | p1;
*port = (p6<<8) | p5; memset(p, 0, sizeof(p));
return 1; for (data = *start; data != *end; data++) {
if (*data >= '0' && *data <= '9') {
p[i] = p[i]*10 + *data - '0';
} else if (*data == ',' && i < 5) {
i++;
} else {
/* unexpected character */
return -1;
} }
return 0; }
if (i != 5)
return -1;
*addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
*port = (p[5]<<8) | p[4];
return 1;
} }
...@@ -136,8 +143,8 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, ...@@ -136,8 +143,8 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
* "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)". * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/ */
static int ip_vs_ftp_out(struct ip_vs_app *app, static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct ip_vs_conn *cp, struct sk_buff *skb) struct sk_buff **pskb, int *diff)
{ {
struct iphdr *iph; struct iphdr *iph;
struct tcphdr *th; struct tcphdr *th;
...@@ -148,24 +155,30 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, ...@@ -148,24 +155,30 @@ static int ip_vs_ftp_out(struct ip_vs_app *app,
struct ip_vs_conn *n_cp; struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len; unsigned buf_len;
int diff; int ret;
*diff = 0;
/* Only useful for established sessions */ /* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED) if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
/* Linear packets are much easier to deal with. */
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0; return 0;
if (cp->app_data == &ip_vs_ftp_pasv) { if (cp->app_data == &ip_vs_ftp_pasv) {
iph = skb->nh.iph; iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2); data = (char *)th + (th->doff << 2);
data_limit = skb->tail; data_limit = (*pskb)->tail;
if (ip_vs_ftp_get_addrport(data, data_limit, if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING, SERVER_STRING,
sizeof(SERVER_STRING)-1, ')', sizeof(SERVER_STRING)-1, ')',
&from, &port, &from, &port,
&start, &end) == 0) &start, &end) != 1)
return 0; return 1;
IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> " IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n", "%u.%u.%u.%u:%d detected\n",
...@@ -196,29 +209,29 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, ...@@ -196,29 +209,29 @@ static int ip_vs_ftp_out(struct ip_vs_app *app,
from = n_cp->vaddr; from = n_cp->vaddr;
port = n_cp->vport; port = n_cp->vport;
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
port&255, port>>8&255); port&255, (port>>8)&255);
buf_len = strlen(buf); buf_len = strlen(buf);
/* /*
* Calculate required delta-offset to keep TCP happy * Calculate required delta-offset to keep TCP happy
*/ */
diff = buf_len - (end-start); *diff = buf_len - (end-start);
if (diff == 0) { if (*diff == 0) {
/* simply replace it with new passive address */ /* simply replace it with new passive address */
memcpy(start, buf, buf_len); memcpy(start, buf, buf_len);
ret = 1;
} else { } else {
/* fixme: return value isn't checked here */ ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
ip_vs_skb_replace(skb, GFP_ATOMIC, start,
end-start, buf, buf_len); end-start, buf, buf_len);
} }
cp->app_data = NULL; cp->app_data = NULL;
ip_vs_tcp_conn_listen(n_cp); ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp); ip_vs_conn_put(n_cp);
return diff; return ret;
} }
return 0; return 1;
} }
...@@ -233,8 +246,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, ...@@ -233,8 +246,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app,
* port, so that the active ftp data connection from the server can reach * port, so that the active ftp data connection from the server can reach
* the client. * the client.
*/ */
static int ip_vs_ftp_in(struct ip_vs_app *app, static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct ip_vs_conn *cp, struct sk_buff *skb) struct sk_buff **pskb, int *diff)
{ {
struct iphdr *iph; struct iphdr *iph;
struct tcphdr *th; struct tcphdr *th;
...@@ -244,29 +257,37 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, ...@@ -244,29 +257,37 @@ static int ip_vs_ftp_in(struct ip_vs_app *app,
__u16 port; __u16 port;
struct ip_vs_conn *n_cp; struct ip_vs_conn *n_cp;
/* no diff required for incoming packets */
*diff = 0;
/* Only useful for established sessions */ /* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED) if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
/* Linear packets are much easier to deal with. */
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0; return 0;
/* /*
* Detecting whether it is passive * Detecting whether it is passive
*/ */
iph = skb->nh.iph; iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is /* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */ to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2); data = data_start = (char *)th + (th->doff << 2);
data_limit = skb->tail; data_limit = (*pskb)->tail;
while (data < data_limit) { while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) { if (strnicmp(data, "PASV\r\n", 6) == 0) {
/* Passive mode on */
IP_VS_DBG(1-debug, "got PASV at %d of %d\n", IP_VS_DBG(1-debug, "got PASV at %d of %d\n",
data - data_start, data - data_start,
data_limit - data_start); data_limit - data_start);
cp->app_data = &ip_vs_ftp_pasv; cp->app_data = &ip_vs_ftp_pasv;
return 0; return 1;
} }
data++; data++;
} }
...@@ -278,28 +299,28 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, ...@@ -278,28 +299,28 @@ static int ip_vs_ftp_in(struct ip_vs_app *app,
* then create a new connection entry for the coming data * then create a new connection entry for the coming data
* connection. * connection.
*/ */
data = data_start; if (ip_vs_ftp_get_addrport(data_start, data_limit,
data_limit = skb->h.raw + skb->len - 18;
if (ip_vs_ftp_get_addrport(data, data_limit,
CLIENT_STRING, sizeof(CLIENT_STRING)-1, CLIENT_STRING, sizeof(CLIENT_STRING)-1,
'\r', &to, &port, '\r', &to, &port,
&start, &end) == 0) &start, &end) != 1)
return 0; return 1;
IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n", IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
NIPQUAD(to), ntohs(port)); NIPQUAD(to), ntohs(port));
/* Passive mode off */
cp->app_data = NULL;
/* /*
* Now update or create a connection entry for it * Now update or create a connection entry for it
*/ */
IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol), ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(iph->daddr), 0); NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
n_cp = ip_vs_conn_in_get(iph->protocol, n_cp = ip_vs_conn_in_get(iph->protocol,
to, port, to, port,
iph->daddr, htons(ntohs(cp->vport)-1)); cp->vaddr, htons(ntohs(cp->vport)-1));
if (!n_cp) { if (!n_cp) {
n_cp = ip_vs_conn_new(IPPROTO_TCP, n_cp = ip_vs_conn_new(IPPROTO_TCP,
to, port, to, port,
...@@ -320,8 +341,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, ...@@ -320,8 +341,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app,
ip_vs_tcp_conn_listen(n_cp); ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp); ip_vs_conn_put(n_cp);
/* no diff required for incoming packets */ return 1;
return 0;
} }
......
...@@ -523,11 +523,12 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) ...@@ -523,11 +523,12 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
* Locality-Based (weighted) Least-Connection scheduling * Locality-Based (weighted) Least-Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl; struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en; struct ip_vs_lblc_entry *en;
struct iphdr *iph = skb->nh.iph;
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
......
...@@ -777,11 +777,12 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) ...@@ -777,11 +777,12 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
* Locality-Based (weighted) Least-Connection scheduling * Locality-Based (weighted) Least-Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_lblcr_table *tbl; struct ip_vs_lblcr_table *tbl;
struct ip_vs_lblcr_entry *en; struct ip_vs_lblcr_entry *en;
struct iphdr *iph = skb->nh.iph;
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
......
...@@ -63,7 +63,7 @@ ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) ...@@ -63,7 +63,7 @@ ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
* Least Connection scheduling * Least Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_lc_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest, *least = NULL; struct ip_vs_dest *dest, *least = NULL;
unsigned int loh = 0, doh; unsigned int loh = 0, doh;
......
...@@ -79,7 +79,7 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) ...@@ -79,7 +79,7 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
* Weighted Least Connection scheduling * Weighted Least Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_nq_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest, *least = NULL; struct ip_vs_dest *dest, *least = NULL;
unsigned int loh = 0, doh; unsigned int loh = 0, doh;
......
...@@ -164,22 +164,33 @@ const char * ip_vs_state_name(__u16 proto, int state) ...@@ -164,22 +164,33 @@ const char * ip_vs_state_name(__u16 proto, int state)
void void
tcpudp_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg) ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
{ {
char buf[128]; char buf[128];
union ip_vs_tphdr h; __u16 ports[2];
struct iphdr iph;
h.raw = (char *) iph + iph->ihl * 4; if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
if (iph->frag_off & __constant_htons(IP_OFFSET)) sprintf(buf, "%s TRUNCATED", pp->name);
else if (iph.frag_off & __constant_htons(IP_OFFSET))
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); pp->name, NIPQUAD(iph.saddr),
NIPQUAD(iph.daddr));
else if (skb_copy_bits(skb, offset + iph.ihl*4, ports, sizeof(ports)) < 0)
sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
pp->name,
NIPQUAD(iph.saddr),
NIPQUAD(iph.daddr));
else else
sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u", sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
pp->name, pp->name,
NIPQUAD(iph->saddr), NIPQUAD(iph.saddr),
ntohs(h.portp[0]), ntohs(ports[0]),
NIPQUAD(iph->daddr), NIPQUAD(iph.daddr),
ntohs(h.portp[1])); ntohs(ports[1]));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
} }
......
...@@ -44,8 +44,11 @@ struct isakmp_hdr { ...@@ -44,8 +44,11 @@ struct isakmp_hdr {
static struct ip_vs_conn * static struct ip_vs_conn *
ah_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ah_conn_in_get(const struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -81,8 +84,8 @@ ah_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -81,8 +84,8 @@ ah_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn * static struct ip_vs_conn *
ah_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -119,8 +122,8 @@ ah_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -119,8 +122,8 @@ ah_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static int static int
ah_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ah_conn_schedule(struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp) int *verdict, struct ip_vs_conn **cpp)
{ {
/* /*
...@@ -132,12 +135,18 @@ ah_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -132,12 +135,18 @@ ah_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static void static void
ah_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg) ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{ {
char buf[256]; char buf[256];
struct iphdr iph;
if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); pp->name, NIPQUAD(iph.saddr),
NIPQUAD(iph.daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
} }
......
...@@ -44,8 +44,11 @@ struct isakmp_hdr { ...@@ -44,8 +44,11 @@ struct isakmp_hdr {
static struct ip_vs_conn * static struct ip_vs_conn *
esp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, esp_conn_in_get(const struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -81,8 +84,8 @@ esp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -81,8 +84,8 @@ esp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn * static struct ip_vs_conn *
esp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -120,7 +123,6 @@ esp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -120,7 +123,6 @@ esp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static int static int
esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h,
int *verdict, struct ip_vs_conn **cpp) int *verdict, struct ip_vs_conn **cpp)
{ {
/* /*
...@@ -132,12 +134,18 @@ esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -132,12 +134,18 @@ esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static void static void
esp_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg) esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{ {
char buf[256]; char buf[256];
struct iphdr iph;
if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); pp->name, NIPQUAD(iph.saddr),
NIPQUAD(iph.daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
} }
......
...@@ -28,8 +28,11 @@ static int icmp_timeouts[1] = { 1*60*HZ }; ...@@ -28,8 +28,11 @@ static int icmp_timeouts[1] = { 1*60*HZ };
static char * icmp_state_name_table[1] = { "ICMP" }; static char * icmp_state_name_table[1] = { "ICMP" };
struct ip_vs_conn * struct ip_vs_conn *
icmp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, icmp_conn_in_get(const struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
int inverse)
{ {
#if 0 #if 0
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -52,8 +55,11 @@ icmp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -52,8 +55,11 @@ icmp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
} }
struct ip_vs_conn * struct ip_vs_conn *
icmp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, icmp_conn_out_get(const struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
int inverse)
{ {
#if 0 #if 0
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
...@@ -76,7 +82,6 @@ icmp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -76,7 +82,6 @@ icmp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static int static int
icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h,
int *verdict, struct ip_vs_conn **cpp) int *verdict, struct ip_vs_conn **cpp)
{ {
*verdict = NF_ACCEPT; *verdict = NF_ACCEPT;
...@@ -84,41 +89,51 @@ icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -84,41 +89,51 @@ icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
} }
static int static int
icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp, icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
if (!(iph->frag_off & __constant_htons(IP_OFFSET))) { if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) {
if (ip_compute_csum(h.raw, size)) { if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
IP_VS_DBG_RL_PKT(0, pp, iph, "Failed checksum for"); if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for");
return 0; return 0;
} }
} }
}
return 1; return 1;
} }
static void static void
icmp_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg) icmp_debug_packet(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
{ {
char buf[256]; char buf[256];
union ip_vs_tphdr h; struct iphdr iph;
struct icmphdr icmph;
h.raw = (char *) iph + iph->ihl * 4; if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0)
if (iph->frag_off & __constant_htons(IP_OFFSET)) sprintf(buf, "%s TRUNCATED", pp->name);
else if (iph.frag_off & __constant_htons(IP_OFFSET))
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); pp->name, NIPQUAD(iph.saddr),
NIPQUAD(iph.daddr));
else if (skb_copy_bits(skb, offset + iph.ihl*4, &icmph, sizeof(icmph)) < 0)
sprintf(buf, "%s TRUNCATED to %u bytes\n",
pp->name, skb->len - offset);
else else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d", sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr), pp->name, NIPQUAD(iph.saddr),
h.icmph->type, h.icmph->code); NIPQUAD(iph.daddr),
icmph.type, icmph.code);
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
} }
static int static int
icmp_state_transition(struct ip_vs_conn *cp, icmp_state_transition(struct ip_vs_conn *cp, int direction,
int direction, struct iphdr *iph, const struct sk_buff *skb,
union ip_vs_tphdr h, struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL]; cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL];
return 1; return 1;
......
...@@ -21,52 +21,68 @@ ...@@ -21,52 +21,68 @@
#include <linux/tcp.h> /* for tcphdr */ #include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h> #include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */ #include <net/tcp.h> /* for csum_tcpudp_magic */
#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h> #include <net/ip_vs.h>
static struct ip_vs_conn * static struct ip_vs_conn *
tcp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
__u16 ports[2];
if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
return NULL;
if (likely(!inverse)) { if (likely(!inverse)) {
return ip_vs_conn_in_get(iph->protocol, return ip_vs_conn_in_get(iph->protocol,
iph->saddr, h.th->source, iph->saddr, ports[0],
iph->daddr, h.th->dest); iph->daddr, ports[1]);
} else { } else {
return ip_vs_conn_in_get(iph->protocol, return ip_vs_conn_in_get(iph->protocol,
iph->daddr, h.th->dest, iph->daddr, ports[1],
iph->saddr, h.th->source); iph->saddr, ports[0]);
} }
} }
static struct ip_vs_conn * static struct ip_vs_conn *
tcp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
__u16 ports[2];
if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
return NULL;
if (likely(!inverse)) { if (likely(!inverse)) {
return ip_vs_conn_out_get(iph->protocol, return ip_vs_conn_out_get(iph->protocol,
iph->saddr, h.th->source, iph->saddr, ports[0],
iph->daddr, h.th->dest); iph->daddr, ports[1]);
} else { } else {
return ip_vs_conn_out_get(iph->protocol, return ip_vs_conn_out_get(iph->protocol,
iph->daddr, h.th->dest, iph->daddr, ports[1],
iph->saddr, h.th->source); iph->saddr, ports[0]);
} }
} }
static int static int
tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_conn_schedule(struct sk_buff *skb,
struct iphdr *iph, union ip_vs_tphdr h, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp) int *verdict, struct ip_vs_conn **cpp)
{ {
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct tcphdr tcph;
if (h.th->syn && if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
(svc = ip_vs_service_get(skb->nfmark, iph->protocol, *verdict = NF_DROP;
iph->daddr, h.portp[1]))) { return 0;
}
if (tcph.syn &&
(svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
skb->nh.iph->daddr, tcph.dest))) {
if (ip_vs_todrop()) { if (ip_vs_todrop()) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
...@@ -81,9 +97,9 @@ tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -81,9 +97,9 @@ tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the * Let the virtual server select a real server for the
* incoming connection, and create a connection entry. * incoming connection, and create a connection entry.
*/ */
*cpp = ip_vs_schedule(svc, iph); *cpp = ip_vs_schedule(svc, skb);
if (!*cpp) { if (!*cpp) {
*verdict = ip_vs_leave(svc, skb, pp, h); *verdict = ip_vs_leave(svc, skb, pp);
return 0; return 0;
} }
ip_vs_service_put(svc); ip_vs_service_put(svc);
...@@ -93,111 +109,128 @@ tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -93,111 +109,128 @@ tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static inline void static inline void
tcp_fast_csum_update(union ip_vs_tphdr *h, u32 oldip, u32 newip, tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
u16 oldport, u16 newport) u16 oldport, u16 newport)
{ {
h->th->check = tcph->check =
ip_vs_check_diff(~oldip, newip, ip_vs_check_diff(~oldip, newip,
ip_vs_check_diff(oldport ^ 0xFFFF, ip_vs_check_diff(oldport ^ 0xFFFF,
newport, h->th->check)); newport, tcph->check));
} }
static int static int
tcp_snat_handler(struct sk_buff *skb, tcp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
int ihl = (char *) h.raw - (char *) iph; struct tcphdr *tcph;
unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
/* We are sure that we work on first fragment */ /* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
return 0;
h.th->source = cp->vport; if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->slave && !pp->csum_check(*pskb, pp))
return 0;
/* Call application helper if needed */ /* Call application helper if needed */
if (ip_vs_app_pkt_out(cp, skb) != 0) { if (!ip_vs_app_pkt_out(cp, pskb))
/* skb data has probably changed, update pointers */ return 0;
iph = skb->nh.iph;
h.raw = (char*)iph + ihl;
size = skb->len - ihl;
} }
tcph = (void *)(*pskb)->nh.iph + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */ /* Adjust TCP checksums */
if (!cp->app) { if (!cp->app) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(&h, cp->daddr, cp->vaddr, tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport); cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_HW) if ((*pskb)->ip_summed == CHECKSUM_HW)
skb->ip_summed = CHECKSUM_NONE; (*pskb)->ip_summed = CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
h.th->check = 0; tcph->check = 0;
skb->csum = csum_partial(h.raw, size, 0); (*pskb)->csum = skb_checksum(*pskb, tcphoff,
h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr, (*pskb)->len - tcphoff, 0);
size, iph->protocol, tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
skb->csum); (*pskb)->len - tcphoff,
cp->protocol,
(*pskb)->csum);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n", IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
pp->name, h.th->check, pp->name, tcph->check,
(char*)&(h.th->check) - (char*)h.raw); (char*)&(tcph->check) - (char*)tcph);
} }
return 1; return 1;
} }
static int static int
tcp_dnat_handler(struct sk_buff *skb, tcp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
int ihl = (char *) h.raw - (char *) iph; struct tcphdr *tcph;
unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
/* We are sure that we work on first fragment */ /* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
return 0;
h.th->dest = cp->dport; if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->slave && !pp->csum_check(*pskb, pp))
return 0;
/* /*
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff * It will fix ip_vs_conn and iph ack_seq stuff
*/ */
if (ip_vs_app_pkt_in(cp, skb) != 0) { if (!ip_vs_app_pkt_in(cp, pskb))
/* skb data has probably changed, update pointers */ return 0;
iph = skb->nh.iph;
h.raw = (char*) iph + ihl;
size = skb->len - ihl;
} }
tcph = (void *)(*pskb)->nh.iph + tcphoff;
tcph->dest = cp->dport;
/* /*
* Adjust TCP/UDP checksums * Adjust TCP checksums
*/ */
if (!cp->app) { if (!cp->app) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(&h, cp->vaddr, cp->daddr, tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport); cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_HW) if ((*pskb)->ip_summed == CHECKSUM_HW)
skb->ip_summed = CHECKSUM_NONE; (*pskb)->ip_summed = CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
h.th->check = 0; tcph->check = 0;
h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr, (*pskb)->csum = skb_checksum(*pskb, tcphoff,
size, iph->protocol, (*pskb)->len - tcphoff, 0);
csum_partial(h.raw, size, 0)); tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
skb->ip_summed = CHECKSUM_UNNECESSARY; (*pskb)->len - tcphoff,
cp->protocol,
(*pskb)->csum);
(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
} }
return 1; return 1;
} }
static int static int
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
unsigned int tcphoff = skb->nh.iph->ihl*4;
switch (skb->ip_summed) { switch (skb->ip_summed) {
case CHECKSUM_NONE: case CHECKSUM_NONE:
skb->csum = csum_partial(h.raw, size, 0); skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_HW: case CHECKSUM_HW:
if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
iph->protocol, skb->csum)) { skb->len - tcphoff,
IP_VS_DBG_RL_PKT(0, pp, iph, skb->nh.iph->protocol, skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }
...@@ -383,10 +416,9 @@ static inline int tcp_state_idx(struct tcphdr *th) ...@@ -383,10 +416,9 @@ static inline int tcp_state_idx(struct tcphdr *th)
static inline void static inline void
set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
int direction, union ip_vs_tphdr h) int direction, struct tcphdr *th)
{ {
int state_idx; int state_idx;
struct tcphdr *th = h.th;
int new_state = IP_VS_TCP_S_CLOSE; int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction]; int state_off = tcp_state_off[direction];
...@@ -448,12 +480,17 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, ...@@ -448,12 +480,17 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
* Handle state transitions * Handle state transitions
*/ */
static int static int
tcp_state_transition(struct ip_vs_conn *cp, tcp_state_transition(struct ip_vs_conn *cp, int direction,
int direction, struct iphdr *iph, const struct sk_buff *skb,
union ip_vs_tphdr h, struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
struct tcphdr tcph;
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0)
return 0;
spin_lock(&cp->lock); spin_lock(&cp->lock);
set_tcp_state(pp, cp, direction, h); set_tcp_state(pp, cp, direction, &tcph);
spin_unlock(&cp->lock); spin_unlock(&cp->lock);
return 1; return 1;
...@@ -574,9 +611,6 @@ static void tcp_exit(struct ip_vs_protocol *pp) ...@@ -574,9 +611,6 @@ static void tcp_exit(struct ip_vs_protocol *pp)
} }
extern void
tcpudp_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg);
struct ip_vs_protocol ip_vs_protocol_tcp = { struct ip_vs_protocol ip_vs_protocol_tcp = {
.name = "TCP", .name = "TCP",
.protocol = IPPROTO_TCP, .protocol = IPPROTO_TCP,
...@@ -599,7 +633,7 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { ...@@ -599,7 +633,7 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.state_name = tcp_state_name, .state_name = tcp_state_name,
.state_transition = tcp_state_transition, .state_transition = tcp_state_transition,
.app_conn_bind = tcp_app_conn_bind, .app_conn_bind = tcp_app_conn_bind,
.debug_packet = tcpudp_debug_packet, .debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change, .timeout_change = tcp_timeout_change,
.set_state_timeout = tcp_set_state_timeout, .set_state_timeout = tcp_set_state_timeout,
}; };
...@@ -16,25 +16,29 @@ ...@@ -16,25 +16,29 @@
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h> #include <net/ip_vs.h>
static struct ip_vs_conn * static struct ip_vs_conn *
udp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
__u16 ports[2];
if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
return NULL;
if (likely(!inverse)) { if (likely(!inverse)) {
cp = ip_vs_conn_in_get(iph->protocol, cp = ip_vs_conn_in_get(iph->protocol,
iph->saddr, h.portp[0], iph->saddr, ports[0],
iph->daddr, h.portp[1]); iph->daddr, ports[1]);
} else { } else {
cp = ip_vs_conn_in_get(iph->protocol, cp = ip_vs_conn_in_get(iph->protocol,
iph->daddr, h.portp[1], iph->daddr, ports[1],
iph->saddr, h.portp[0]); iph->saddr, ports[0]);
} }
return cp; return cp;
...@@ -42,19 +46,23 @@ udp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -42,19 +46,23 @@ udp_conn_in_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn * static struct ip_vs_conn *
udp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h, int inverse) const struct iphdr *iph, unsigned int proto_off, int inverse)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
__u16 ports[2];
if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0)
return NULL;
if (likely(!inverse)) { if (likely(!inverse)) {
cp = ip_vs_conn_out_get(iph->protocol, cp = ip_vs_conn_out_get(iph->protocol,
iph->saddr, h.portp[0], iph->saddr, ports[0],
iph->daddr, h.portp[1]); iph->daddr, ports[1]);
} else { } else {
cp = ip_vs_conn_out_get(iph->protocol, cp = ip_vs_conn_out_get(iph->protocol,
iph->daddr, h.portp[1], iph->daddr, ports[1],
iph->saddr, h.portp[0]); iph->saddr, ports[0]);
} }
return cp; return cp;
...@@ -63,13 +71,18 @@ udp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -63,13 +71,18 @@ udp_conn_out_get(struct sk_buff *skb, struct ip_vs_protocol *pp,
static int static int
udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *iph, union ip_vs_tphdr h,
int *verdict, struct ip_vs_conn **cpp) int *verdict, struct ip_vs_conn **cpp)
{ {
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct udphdr udph;
if ((svc = ip_vs_service_get(skb->nfmark, iph->protocol, if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
iph->daddr, h.portp[1]))) { *verdict = NF_DROP;
return 0;
}
if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
skb->nh.iph->daddr, udph.dest))) {
if (ip_vs_todrop()) { if (ip_vs_todrop()) {
/* /*
* It seems that we are very loaded. * It seems that we are very loaded.
...@@ -84,9 +97,9 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -84,9 +97,9 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the * Let the virtual server select a real server for the
* incoming connection, and create a connection entry. * incoming connection, and create a connection entry.
*/ */
*cpp = ip_vs_schedule(svc, iph); *cpp = ip_vs_schedule(svc, skb);
if (!*cpp) { if (!*cpp) {
*verdict = ip_vs_leave(svc, skb, pp, h); *verdict = ip_vs_leave(svc, skb, pp);
return 0; return 0;
} }
ip_vs_service_put(svc); ip_vs_service_put(svc);
...@@ -96,121 +109,145 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -96,121 +109,145 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static inline void static inline void
udp_fast_csum_update(union ip_vs_tphdr *h, u32 oldip, u32 newip, udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
u16 oldport, u16 newport) u16 oldport, u16 newport)
{ {
h->uh->check = uhdr->check =
ip_vs_check_diff(~oldip, newip, ip_vs_check_diff(~oldip, newip,
ip_vs_check_diff(oldport ^ 0xFFFF, ip_vs_check_diff(oldport ^ 0xFFFF,
newport, h->uh->check)); newport, uhdr->check));
if (!h->uh->check) if (!uhdr->check)
h->uh->check = 0xFFFF; uhdr->check = 0xFFFF;
} }
static int static int
udp_snat_handler(struct sk_buff *skb, udp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
int ihl = (char *) h.raw - (char *) iph; struct udphdr *udph;
unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
/* We are sure that we work on first fragment */ /* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
return 0;
h.portp[0] = cp->vport; if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->slave && !pp->csum_check(*pskb, pp))
return 0;
/* /*
* Call application helper if needed * Call application helper if needed
*/ */
if (ip_vs_app_pkt_out(cp, skb) != 0) { if (!ip_vs_app_pkt_out(cp, pskb))
/* skb data has probably changed, update pointers */ return 0;
iph = skb->nh.iph;
h.raw = (char*)iph + ihl;
size = skb->len - ihl;
} }
udph = (void *)(*pskb)->nh.iph + udphoff;
udph->source = cp->vport;
/* /*
* Adjust UDP checksums * Adjust UDP checksums
*/ */
if (!cp->app && (h.uh->check != 0)) { if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(&h, cp->daddr, cp->vaddr, udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport); cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_HW) if ((*pskb)->ip_summed == CHECKSUM_HW)
skb->ip_summed = CHECKSUM_NONE; (*pskb)->ip_summed = CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
h.uh->check = 0; udph->check = 0;
skb->csum = csum_partial(h.raw, size, 0); (*pskb)->csum = skb_checksum(*pskb, udphoff,
h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr, (*pskb)->len - udphoff, 0);
size, iph->protocol, udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
skb->csum); (*pskb)->len - udphoff,
if (h.uh->check == 0) cp->protocol,
h.uh->check = 0xFFFF; (*pskb)->csum);
if (udph->check == 0)
udph->check = 0xFFFF;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n", IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
pp->name, h.uh->check, pp->name, udph->check,
(char*)&(h.uh->check) - (char*)h.raw); (char*)&(udph->check) - (char*)udph);
} }
return 1; return 1;
} }
static int static int
udp_dnat_handler(struct sk_buff *skb, udp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
int ihl = (char *) h.raw - (char *) iph; struct udphdr *udph;
unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
/* We are sure that we work on first fragment */ /* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
return 0;
h.portp[1] = cp->dport; if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->slave && !pp->csum_check(*pskb, pp))
return 0;
/* /*
* Attempt ip_vs_app call. * Attempt ip_vs_app call.
* will fix ip_vs_conn and iph ack_seq stuff * It will fix ip_vs_conn
*/ */
if (ip_vs_app_pkt_in(cp, skb) != 0) { if (!ip_vs_app_pkt_in(cp, pskb))
/* skb data has probably changed, update pointers */ return 0;
iph = skb->nh.iph;
h.raw = (char*) iph + ihl;
size = skb->len - ihl;
} }
udph = (void *)(*pskb)->nh.iph + udphoff;
udph->dest = cp->dport;
/* /*
* Adjust UDP checksums * Adjust UDP checksums
*/ */
if (!cp->app && (h.uh->check != 0)) { if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */ /* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(&h, cp->vaddr, cp->daddr, udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport); cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_HW) if ((*pskb)->ip_summed == CHECKSUM_HW)
skb->ip_summed = CHECKSUM_NONE; (*pskb)->ip_summed = CHECKSUM_NONE;
} else { } else {
/* full checksum calculation */ /* full checksum calculation */
h.uh->check = 0; udph->check = 0;
h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr, (*pskb)->csum = skb_checksum(*pskb, udphoff,
size, iph->protocol, (*pskb)->len - udphoff, 0);
csum_partial(h.raw, size, 0)); udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
if (h.uh->check == 0) (*pskb)->len - udphoff,
h.uh->check = 0xFFFF; cp->protocol,
skb->ip_summed = CHECKSUM_UNNECESSARY; (*pskb)->csum);
if (udph->check == 0)
udph->check = 0xFFFF;
(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
} }
return 1; return 1;
} }
static int static int
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
struct iphdr *iph, union ip_vs_tphdr h, int size)
{ {
if (h.uh->check != 0) { struct udphdr udph;
unsigned int udphoff = skb->nh.iph->ihl*4;
if (skb_copy_bits(skb, udphoff, &udph, sizeof(udph)) < 0)
return 0;
if (udph.check != 0) {
switch (skb->ip_summed) { switch (skb->ip_summed) {
case CHECKSUM_NONE: case CHECKSUM_NONE:
skb->csum = csum_partial(h.raw, size, 0); skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_HW: case CHECKSUM_HW:
if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, if (csum_tcpudp_magic(skb->nh.iph->saddr,
iph->protocol, skb->csum)) { skb->nh.iph->daddr,
IP_VS_DBG_RL_PKT(0, pp, iph, skb->len - udphoff,
skb->nh.iph->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for"); "Failed checksum for");
return 0; return 0;
} }
...@@ -342,9 +379,9 @@ static const char * udp_state_name(int state) ...@@ -342,9 +379,9 @@ static const char * udp_state_name(int state)
} }
static int static int
udp_state_transition(struct ip_vs_conn *cp, udp_state_transition(struct ip_vs_conn *cp, int direction,
int direction, struct iphdr *iph, const struct sk_buff *skb,
union ip_vs_tphdr h, struct ip_vs_protocol *pp) struct ip_vs_protocol *pp)
{ {
cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
return 1; return 1;
...@@ -361,9 +398,6 @@ static void udp_exit(struct ip_vs_protocol *pp) ...@@ -361,9 +398,6 @@ static void udp_exit(struct ip_vs_protocol *pp)
} }
extern void
tcpudp_debug_packet(struct ip_vs_protocol *pp, struct iphdr *iph, char *msg);
struct ip_vs_protocol ip_vs_protocol_udp = { struct ip_vs_protocol ip_vs_protocol_udp = {
.name = "UDP", .name = "UDP",
.protocol = IPPROTO_UDP, .protocol = IPPROTO_UDP,
...@@ -385,7 +419,7 @@ struct ip_vs_protocol ip_vs_protocol_udp = { ...@@ -385,7 +419,7 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.register_app = udp_register_app, .register_app = udp_register_app,
.unregister_app = udp_unregister_app, .unregister_app = udp_unregister_app,
.app_conn_bind = udp_app_conn_bind, .app_conn_bind = udp_app_conn_bind,
.debug_packet = tcpudp_debug_packet, .debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, .timeout_change = NULL,
.set_state_timeout = udp_set_state_timeout, .set_state_timeout = udp_set_state_timeout,
}; };
...@@ -55,7 +55,7 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc) ...@@ -55,7 +55,7 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
* Round-Robin Scheduling * Round-Robin Scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_rr_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct list_head *p, *q; struct list_head *p, *q;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
......
...@@ -83,7 +83,7 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) ...@@ -83,7 +83,7 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
* Weighted Least Connection scheduling * Weighted Least Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_sed_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest, *least; struct ip_vs_dest *dest, *least;
unsigned int loh, doh; unsigned int loh, doh;
......
...@@ -199,10 +199,11 @@ static inline int is_overloaded(struct ip_vs_dest *dest) ...@@ -199,10 +199,11 @@ static inline int is_overloaded(struct ip_vs_dest *dest)
* Source Hashing scheduling * Source Hashing scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_sh_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_sh_bucket *tbl; struct ip_vs_sh_bucket *tbl;
struct iphdr *iph = skb->nh.iph;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
......
...@@ -71,7 +71,7 @@ ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) ...@@ -71,7 +71,7 @@ ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
* Weighted Least Connection scheduling * Weighted Least Connection scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest, *least; struct ip_vs_dest *dest, *least;
unsigned int loh, doh; unsigned int loh, doh;
......
...@@ -138,7 +138,7 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) ...@@ -138,7 +138,7 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
* Weighted Round-Robin Scheduling * Weighted Round-Robin Scheduling
*/ */
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_wrr_schedule(struct ip_vs_service *svc, struct iphdr *iph) ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_wrr_mark *mark = svc->sched_data; struct ip_vs_wrr_mark *mark = svc->sched_data;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment