Commit bc29b7ac authored by Gerald Schaefer's avatar Gerald Schaefer Committed by Martin Schwidefsky

s390/mm: clean up pte/pmd encoding

The hugetlbfs pte<->pmd conversion functions currently assume that the pmd
bit layout is consistent with the pte layout, which is not really true.

The SW read and write bits are encoded as the sequence "wr" in a pte, but
in a pmd it is "rw". The hugetlbfs conversion assumes that the sequence
is identical in both cases, which results in swapped read and write bits
in the pmd. In practice this is not a problem, because those pmd bits are
only relevant for THP pmds and not for hugetlbfs pmds. The hugetlbfs code
works on (fake) ptes, and the converted pte bits are correct.

There is another variation in pte/pmd encoding which affects dirty
prot-none ptes/pmds. In this case, a pmd has both its HW read-only and
invalid bit set, while it is only the invalid bit for a pte. This also has
no effect in practice, but it should better be consistent.

This patch fixes both inconsistencies by changing the SW read/write bit
layout for pmds as well as the PAGE_NONE encoding for ptes. It also makes
the hugetlbfs conversion functions more robust by introducing a
move_set_bit() macro that uses the pte/pmd bit #defines instead of
constant shifts.
Signed-off-by: default avatarGerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent bad60e6f
...@@ -242,8 +242,8 @@ static inline int is_module_addr(void *addr) ...@@ -242,8 +242,8 @@ static inline int is_module_addr(void *addr)
* swap .11..ttttt.0 * swap .11..ttttt.0
* prot-none, clean, old .11.xx0000.1 * prot-none, clean, old .11.xx0000.1
* prot-none, clean, young .11.xx0001.1 * prot-none, clean, young .11.xx0001.1
* prot-none, dirty, old .10.xx0010.1 * prot-none, dirty, old .11.xx0010.1
* prot-none, dirty, young .10.xx0011.1 * prot-none, dirty, young .11.xx0011.1
* read-only, clean, old .11.xx0100.1 * read-only, clean, old .11.xx0100.1
* read-only, clean, young .01.xx0101.1 * read-only, clean, young .01.xx0101.1
* read-only, dirty, old .11.xx0110.1 * read-only, dirty, old .11.xx0110.1
...@@ -323,8 +323,8 @@ static inline int is_module_addr(void *addr) ...@@ -323,8 +323,8 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */
#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ #define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */
#ifdef CONFIG_MEM_SOFT_DIRTY #ifdef CONFIG_MEM_SOFT_DIRTY
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ #define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
...@@ -335,15 +335,15 @@ static inline int is_module_addr(void *addr) ...@@ -335,15 +335,15 @@ static inline int is_module_addr(void *addr)
/* /*
* Segment table and region3 table entry encoding * Segment table and region3 table entry encoding
* (R = read-only, I = invalid, y = young bit): * (R = read-only, I = invalid, y = young bit):
* dy..R...I...rw * dy..R...I...wr
* prot-none, clean, old 00..1...1...00 * prot-none, clean, old 00..1...1...00
* prot-none, clean, young 01..1...1...00 * prot-none, clean, young 01..1...1...00
* prot-none, dirty, old 10..1...1...00 * prot-none, dirty, old 10..1...1...00
* prot-none, dirty, young 11..1...1...00 * prot-none, dirty, young 11..1...1...00
* read-only, clean, old 00..1...1...10 * read-only, clean, old 00..1...1...01
* read-only, clean, young 01..1...0...10 * read-only, clean, young 01..1...0...01
* read-only, dirty, old 10..1...1...10 * read-only, dirty, old 10..1...1...01
* read-only, dirty, young 11..1...0...10 * read-only, dirty, young 11..1...0...01
* read-write, clean, old 00..1...1...11 * read-write, clean, old 00..1...1...11
* read-write, clean, young 01..1...0...11 * read-write, clean, young 01..1...0...11
* read-write, dirty, old 10..0...1...11 * read-write, dirty, old 10..0...1...11
...@@ -382,7 +382,7 @@ static inline int is_module_addr(void *addr) ...@@ -382,7 +382,7 @@ static inline int is_module_addr(void *addr)
/* /*
* Page protection definitions. * Page protection definitions.
*/ */
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ #define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
_PAGE_INVALID | _PAGE_PROTECT) _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ #define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
......
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
* it to the position indicated by single-bit bitmask "b".
*/
#define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b))
static inline unsigned long __pte_to_rste(pte_t pte) static inline unsigned long __pte_to_rste(pte_t pte)
{ {
unsigned long rste; unsigned long rste;
...@@ -37,13 +43,22 @@ static inline unsigned long __pte_to_rste(pte_t pte) ...@@ -37,13 +43,22 @@ static inline unsigned long __pte_to_rste(pte_t pte)
*/ */
if (pte_present(pte)) { if (pte_present(pte)) {
rste = pte_val(pte) & PAGE_MASK; rste = pte_val(pte) & PAGE_MASK;
rste |= (pte_val(pte) & _PAGE_READ) >> 4; rste |= move_set_bit(pte_val(pte), _PAGE_READ,
rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; _SEGMENT_ENTRY_READ);
rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
rste |= (pte_val(pte) & _PAGE_PROTECT); _SEGMENT_ENTRY_WRITE);
rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; _SEGMENT_ENTRY_INVALID);
rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
_SEGMENT_ENTRY_PROTECT);
rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
_SEGMENT_ENTRY_DIRTY);
rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
_SEGMENT_ENTRY_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
_SEGMENT_ENTRY_SOFT_DIRTY);
#endif
} else } else
rste = _SEGMENT_ENTRY_INVALID; rste = _SEGMENT_ENTRY_INVALID;
return rste; return rste;
...@@ -82,13 +97,22 @@ static inline pte_t __rste_to_pte(unsigned long rste) ...@@ -82,13 +97,22 @@ static inline pte_t __rste_to_pte(unsigned long rste)
if (present) { if (present) {
pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ,
pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; _PAGE_READ);
pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE,
pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); _PAGE_WRITE);
pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID,
pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; _PAGE_INVALID);
pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT,
_PAGE_PROTECT);
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY,
_PAGE_DIRTY);
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG,
_PAGE_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
_PAGE_DIRTY);
#endif
} else } else
pte_val(pte) = _PAGE_INVALID; pte_val(pte) = _PAGE_INVALID;
return pte; return pte;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment