Commit 4a25c3d6 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix swp_entry_t encoding

From: Martin Schwidefsky <schwidefsky@de.ibm.com>

This fixes a problem in sys_swapon that can cause the creation of invalid
swap ptes.  This has its cause in the arch-independent swap entries vs. 
the pte coded swap entries.  The swp_entry_t uses 27 bits for the offset
and 5 bits for the type.  In sys_swapon this definition is used to find how
many swap devices and how many pages on each device there can be.  But the
swap entries encoded in a pte can be subject to additional restrictions due
to the hardware besides the 27/5 division of the bits in the swp_entry_t
type.  This is solved by adding pte_to_swp_entry and swp_entry_to_pte calls
to the calculations for maximum type and offset.

In addition the s390 swap pte division for offset/type is changed from 19/6
bits to 20/5 bits.
parent a8760f9c
...@@ -719,14 +719,14 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) ...@@ -719,14 +719,14 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* information in the lowcore. * information in the lowcore.
* Bit 21 and bit 22 are the page invalid bit and the page protection * Bit 21 and bit 22 are the page invalid bit and the page protection
* bit. We set both to indicate a swapped page. * bit. We set both to indicate a swapped page.
* Bit 31 is used as the software page present bit. If a page is * Bit 30 and 31 are used to distinguish the different page types. For
* swapped this obviously has to be zero. * a swapped page these bits need to be zero.
* This leaves the bits 1-19 and bits 24-30 to store type and offset. * This leaves the bits 1-19 and bits 24-29 to store type and offset.
* We use the 7 bits from 24-30 for the type and the 19 bits from 1-19 * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
* for the offset. * plus 24 for the offset.
* 0| offset |0110|type |0 * 0| offset |0110|o|type |00|
* 00000000001111111111222222222233 * 0 0000000001111111111 2222 2 22222 33
* 01234567890123456789012345678901 * 0 1234567890123456789 0123 4 56789 01
* *
* 64 bit swap entry format: * 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way. * A page-table entry has some bits we have to treat in a special way.
...@@ -736,29 +736,25 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) ...@@ -736,29 +736,25 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* information in the lowcore. * information in the lowcore.
* Bit 53 and bit 54 are the page invalid bit and the page protection * Bit 53 and bit 54 are the page invalid bit and the page protection
* bit. We set both to indicate a swapped page. * bit. We set both to indicate a swapped page.
* Bit 63 is used as the software page present bit. If a page is * Bit 62 and 63 are used to distinguish the different page types. For
* swapped this obviously has to be zero. * a swapped page these bits need to be zero.
* This leaves the bits 0-51 and bits 56-62 to store type and offset. * This leaves the bits 0-51 and bits 56-61 to store type and offset.
* We use the 7 bits from 56-62 for the type and the 52 bits from 0-51 * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
* for the offset. * plus 56 for the offset.
* | offset |0110|type |0 * | offset |0110|o|type |00|
* 0000000000111111111122222222223333333333444444444455555555556666 * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
* 0123456789012345678901234567890123456789012345678901234567890123 * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
*/ */
extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{ {
pte_t pte; pte_t pte;
pte_val(pte) = (type << 1) | (offset << 12) | _PAGE_INVALID_SWAP; pte_val(pte) = _PAGE_INVALID_SWAP | ((type & 0x1f) << 2) |
#ifndef __s390x__ ((offset & 1) << 7) | ((offset & 0xffffe) << 11);
BUG_ON((pte_val(pte) & 0x80000901) != 0);
#else /* __s390x__ */
BUG_ON((pte_val(pte) & 0x901) != 0);
#endif /* __s390x__ */
return pte; return pte;
} }
#define __swp_type(entry) (((entry).val >> 1) & 0x3f) #define __swp_type(entry) (((entry).val >> 2) & 0x1f)
#define __swp_offset(entry) ((entry).val >> 12) #define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
/* /*
* Store a type+offset into a swp_entry_t in an arch-independent format * Store a type+offset into a swp_entry_t in an arch-independent format
*/ */
static inline swp_entry_t swp_entry(unsigned type, pgoff_t offset) static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
{ {
swp_entry_t ret; swp_entry_t ret;
ret.val = ((unsigned long)type << SWP_TYPE_SHIFT(ret)) | ret.val = (type << SWP_TYPE_SHIFT(ret)) |
(offset & SWP_OFFSET_MASK(ret)); (offset & SWP_OFFSET_MASK(ret));
return ret; return ret;
} }
......
...@@ -1242,7 +1242,19 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) ...@@ -1242,7 +1242,19 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
if (!(p->flags & SWP_USED)) if (!(p->flags & SWP_USED))
break; break;
error = -EPERM; error = -EPERM;
if (type >= MAX_SWAPFILES) { /*
* Test if adding another swap device is possible. There are
* two limiting factors: 1) the number of bits for the swap
* type swp_entry_t definition and 2) the number of bits for
* the swap type in the swap ptes as defined by the different
* architectures. To honor both limitations a swap entry
* with swap offset 0 and swap type ~0UL is created, encoded
* to a swap pte, decoded to a swp_entry_t again and finally
* the swap type part is extracted. This will mask all bits
* from the initial ~0UL that can't be encoded in either the
* swp_entry_t or the architecture definition of a swap pte.
*/
if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
swap_list_unlock(); swap_list_unlock();
goto out; goto out;
} }
...@@ -1364,7 +1376,21 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) ...@@ -1364,7 +1376,21 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
} }
p->lowest_bit = 1; p->lowest_bit = 1;
maxpages = swp_offset(swp_entry(0,~0UL)) - 1; /*
* Find out how many pages are allowed for a single swap
* device. There are two limiting factors: 1) the number of
* bits for the swap offset in the swp_entry_t type and
* 2) the number of bits in the a swap pte as defined by
* the different architectures. In order to find the
* largest possible bit mask a swap entry with swap type 0
* and swap offset ~0UL is created, encoded to a swap pte,
* decoded to a swp_entry_t again and finally the swap
* offset is extracted. This will mask all the bits from
* the initial ~0UL mask that can't be encoded in either
* the swp_entry_t or the architecture definition of a
* swap pte.
*/
maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
if (maxpages > swap_header->info.last_page) if (maxpages > swap_header->info.last_page)
maxpages = swap_header->info.last_page; maxpages = swap_header->info.last_page;
p->highest_bit = maxpages - 1; p->highest_bit = maxpages - 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment