Commit 4a25c3d6 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix swp_entry_t encoding

From: Martin Schwidefsky <schwidefsky@de.ibm.com>

This fixes a problem in sys_swapon that can cause the creation of invalid
swap ptes.  This has its cause in the arch-independent swap entries vs. 
the pte coded swap entries.  The swp_entry_t uses 27 bits for the offset
and 5 bits for the type.  In sys_swapon this definition is used to find how
many swap devices and how many pages on each device there can be.  But the
swap entries encoded in a pte can be subject to additional restrictions due
to the hardware besides the 27/5 division of the bits in the swp_entry_t
type.  This is solved by adding pte_to_swp_entry and swp_entry_to_pte calls
to the calculations for maximum type and offset.

In addition the s390 swap pte division for offset/type is changed from 19/6
bits to 20/5 bits.
parent a8760f9c
......@@ -719,14 +719,14 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* information in the lowcore.
* Bit 21 and bit 22 are the page invalid bit and the page protection
* bit. We set both to indicate a swapped page.
* Bit 31 is used as the software page present bit. If a page is
* swapped this obviously has to be zero.
* This leaves the bits 1-19 and bits 24-30 to store type and offset.
* We use the 7 bits from 24-30 for the type and the 19 bits from 1-19
* for the offset.
* 0| offset |0110|type |0
* 00000000001111111111222222222233
* 01234567890123456789012345678901
* Bit 30 and 31 are used to distinguish the different page types. For
* a swapped page these bits need to be zero.
* This leaves the bits 1-19 and bits 24-29 to store type and offset.
* We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
* plus 24 for the offset.
* 0| offset |0110|o|type |00|
* 0 0000000001111111111 2222 2 22222 33
* 0 1234567890123456789 0123 4 56789 01
*
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
......@@ -736,29 +736,25 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* information in the lowcore.
* Bit 53 and bit 54 are the page invalid bit and the page protection
* bit. We set both to indicate a swapped page.
* Bit 63 is used as the software page present bit. If a page is
* swapped this obviously has to be zero.
* This leaves the bits 0-51 and bits 56-62 to store type and offset.
* We use the 7 bits from 56-62 for the type and the 52 bits from 0-51
* for the offset.
* | offset |0110|type |0
* 0000000000111111111122222222223333333333444444444455555555556666
* 0123456789012345678901234567890123456789012345678901234567890123
* Bit 62 and 63 are used to distinguish the different page types. For
* a swapped page these bits need to be zero.
* This leaves the bits 0-51 and bits 56-61 to store type and offset.
* We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
* plus 56 for the offset.
* | offset |0110|o|type |00|
* 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
* 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
*/
extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
pte_val(pte) = (type << 1) | (offset << 12) | _PAGE_INVALID_SWAP;
#ifndef __s390x__
BUG_ON((pte_val(pte) & 0x80000901) != 0);
#else /* __s390x__ */
BUG_ON((pte_val(pte) & 0x901) != 0);
#endif /* __s390x__ */
pte_val(pte) = _PAGE_INVALID_SWAP | ((type & 0x1f) << 2) |
((offset & 1) << 7) | ((offset & 0xffffe) << 11);
return pte;
}
#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
#define __swp_offset(entry) ((entry).val >> 12)
#define __swp_type(entry) (((entry).val >> 2) & 0x1f)
#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
......
......@@ -15,11 +15,11 @@
/*
* Store a type+offset into a swp_entry_t in an arch-independent format
*/
static inline swp_entry_t swp_entry(unsigned type, pgoff_t offset)
static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
{
swp_entry_t ret;
ret.val = ((unsigned long)type << SWP_TYPE_SHIFT(ret)) |
ret.val = (type << SWP_TYPE_SHIFT(ret)) |
(offset & SWP_OFFSET_MASK(ret));
return ret;
}
......
......@@ -1242,7 +1242,19 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
if (!(p->flags & SWP_USED))
break;
error = -EPERM;
if (type >= MAX_SWAPFILES) {
/*
* Test if adding another swap device is possible. There are
* two limiting factors: 1) the number of bits for the swap
* type swp_entry_t definition and 2) the number of bits for
* the swap type in the swap ptes as defined by the different
* architectures. To honor both limitations a swap entry
* with swap offset 0 and swap type ~0UL is created, encoded
* to a swap pte, decoded to a swp_entry_t again and finally
* the swap type part is extracted. This will mask all bits
* from the initial ~0UL that can't be encoded in either the
* swp_entry_t or the architecture definition of a swap pte.
*/
if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
swap_list_unlock();
goto out;
}
......@@ -1364,7 +1376,21 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
}
p->lowest_bit = 1;
maxpages = swp_offset(swp_entry(0,~0UL)) - 1;
/*
* Find out how many pages are allowed for a single swap
* device. There are two limiting factors: 1) the number of
* bits for the swap offset in the swp_entry_t type and
* 2) the number of bits in the a swap pte as defined by
* the different architectures. In order to find the
* largest possible bit mask a swap entry with swap type 0
* and swap offset ~0UL is created, encoded to a swap pte,
* decoded to a swp_entry_t again and finally the swap
* offset is extracted. This will mask all the bits from
* the initial ~0UL mask that can't be encoded in either
* the swp_entry_t or the architecture definition of a
* swap pte.
*/
maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
if (maxpages > swap_header->info.last_page)
maxpages = swap_header->info.last_page;
p->highest_bit = maxpages - 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment