Commit 545a604c authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

[PATCH] Move accounting function calls out of critical vm code paths

In the 2.6.11 development cycle function calls have been added to lots
of hot vm paths to do accounting. I think these should not go into the
final 2.6.1 release because these statistics can be collected in a different
way that does not require the updating of counters from frequently used
vm code paths and is consistent with the methods use elsewhere in the kernel
to obtain statistics.

These function calls are

acct_update_integrals	-> Account for processes based on stime changes
update_mem_hiwater	-> takes rss and total_vm hiwater marks.

acct_update_integrals is only useful to call if stime changes otherwise
it will simply return. It is therefore best to relocate the function call
to acct_update_integral into the function that updates stime which is
account_system_time and remove it from the vm code paths.

update_mem_hiwater finds the rss hiwater mark.  We call that from timer
context as well.  This means that processes' high-water marks are now
sampled statistically, at timer-interrupt time rather than
deterministically.  This may or may not be a problem..

This means that the rss limit is not always updated if rss is increased
and thus not as accurate. But the benefit is that the rss checks do no
pollute the vm paths and that it is consistent with the rss limit check.

The following patch removes acct_update_integrals and update_mem_hiwater
from the hot vm paths.
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>

From: Jay Lan <jlan@sgi.com>

The new "move-accounting-function-calls-out-of-critical-vm-code-paths"
patch in 2.6.11-rc3-mm2 was different from the code i tested.

In particular, it mistakenly dropped the accounting routine calls
in fs/exec.c. The calls in do_execve() are needed to properly
initialize accounting fields. Specifically, the tsk->acct_stimexpd
needs to be initialized to tsk->stime.

I have discussed this with Christoph Lameter and he gave me full
blessings to bring the calls back.
Signed-off-by: default avatarJay Lan <jlan@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent bf57909f
......@@ -1193,8 +1193,8 @@ int do_execve(char * filename,
/* execve success */
security_bprm_free(bprm);
acct_update_integrals();
update_mem_hiwater();
acct_update_integrals(current);
update_mem_hiwater(current);
kfree(bprm);
return retval;
}
......
......@@ -120,12 +120,12 @@ struct acct_v3
struct super_block;
extern void acct_auto_close(struct super_block *sb);
extern void acct_process(long exitcode);
extern void acct_update_integrals(void);
extern void acct_update_integrals(struct task_struct *tsk);
extern void acct_clear_integrals(struct task_struct *tsk);
#else
#define acct_auto_close(x) do { } while (0)
#define acct_process(x) do { } while (0)
#define acct_update_integrals() do { } while (0)
#define acct_update_integrals(x) do { } while (0)
#define acct_clear_integrals(task) do { } while (0)
#endif
......
......@@ -838,7 +838,7 @@ static inline void vm_stat_unaccount(struct vm_area_struct *vma)
}
/* update per process rss and vm hiwater data */
extern void update_mem_hiwater(void);
extern void update_mem_hiwater(struct task_struct *tsk);
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
......
......@@ -534,10 +534,8 @@ void acct_process(long exitcode)
* acct_update_integrals
* - update mm integral fields in task_struct
*/
void acct_update_integrals(void)
void acct_update_integrals(struct task_struct *tsk)
{
struct task_struct *tsk = current;
if (likely(tsk->mm)) {
long delta = tsk->stime - tsk->acct_stimexpd;
......
......@@ -806,8 +806,8 @@ fastcall NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
acct_update_integrals();
update_mem_hiwater();
acct_update_integrals(tsk);
update_mem_hiwater(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead)
acct_process(code);
......
......@@ -45,6 +45,7 @@
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/acct.h>
#include <asm/tlb.h>
#include <asm/unistd.h>
......@@ -2379,6 +2380,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
else
cpustat->idle = cputime64_add(cpustat->idle, tmp);
/* Account for system time used */
acct_update_integrals(p);
/* Update rss highwater mark */
update_mem_hiwater(p);
}
/*
......
......@@ -46,7 +46,6 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/module.h>
#include <linux/init.h>
......@@ -735,7 +734,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
tlb_finish_mmu(tlb, address, end);
acct_update_integrals();
spin_unlock(&mm->page_table_lock);
}
......@@ -1338,11 +1336,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
if (likely(pte_same(*page_table, pte))) {
if (PageAnon(old_page))
mm->anon_rss--;
if (PageReserved(old_page)) {
if (PageReserved(old_page))
++mm->rss;
acct_update_integrals();
update_mem_hiwater();
} else
else
page_remove_rmap(old_page);
break_cow(vma, new_page, address, page_table);
lru_cache_add_active(new_page);
......@@ -1747,9 +1743,6 @@ static int do_swap_page(struct mm_struct * mm,
remove_exclusive_swap_page(page);
mm->rss++;
acct_update_integrals();
update_mem_hiwater();
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
......@@ -1814,8 +1807,6 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out;
}
mm->rss++;
acct_update_integrals();
update_mem_hiwater();
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
......@@ -1932,8 +1923,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_none(*page_table)) {
if (!PageReserved(new_page))
++mm->rss;
acct_update_integrals();
update_mem_hiwater();
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
......@@ -2253,10 +2242,8 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
* update_mem_hiwater
* - update per process rss and vm high water data
*/
void update_mem_hiwater(void)
void update_mem_hiwater(struct task_struct *tsk)
{
struct task_struct *tsk = current;
if (tsk->mm) {
if (tsk->mm->hiwater_rss < tsk->mm->rss)
tsk->mm->hiwater_rss = tsk->mm->rss;
......
......@@ -21,7 +21,6 @@
#include <linux/hugetlb.h>
#include <linux/profile.h>
#include <linux/module.h>
#include <linux/acct.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
......@@ -1121,8 +1120,6 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
pgoff, flags & MAP_NONBLOCK);
down_write(&mm->mmap_sem);
}
acct_update_integrals();
update_mem_hiwater();
return addr;
unmap_and_free_vma:
......@@ -1463,8 +1460,6 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
__vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
acct_update_integrals();
update_mem_hiwater();
return 0;
}
......@@ -1968,8 +1963,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
mm->locked_vm += len >> PAGE_SHIFT;
make_pages_present(addr, addr + len);
}
acct_update_integrals();
update_mem_hiwater();
return addr;
}
......
......@@ -16,7 +16,6 @@
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/acct.h>
#include <linux/syscalls.h>
#include <asm/uaccess.h>
......@@ -255,9 +254,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
new_addr + new_len);
}
acct_update_integrals();
update_mem_hiwater();
return new_addr;
}
......@@ -394,8 +390,6 @@ unsigned long do_mremap(unsigned long addr,
make_pages_present(addr + old_len,
addr + new_len);
}
acct_update_integrals();
update_mem_hiwater();
ret = addr;
goto out;
}
......
......@@ -959,10 +959,8 @@ void arch_unmap_area(struct vm_area_struct *area)
{
}
void update_mem_hiwater(void)
void update_mem_hiwater(struct task_struct *tsk)
{
struct task_struct *tsk = current;
if (likely(tsk->mm)) {
if (tsk->mm->hiwater_rss < tsk->mm->rss)
tsk->mm->hiwater_rss = tsk->mm->rss;
......
......@@ -51,7 +51,6 @@
#include <linux/swapops.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/acct.h>
#include <linux/rmap.h>
#include <linux/rcupdate.h>
......@@ -600,7 +599,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
}
mm->rss--;
acct_update_integrals();
page_remove_rmap(page);
page_cache_release(page);
......@@ -705,7 +703,6 @@ static void try_to_unmap_cluster(unsigned long cursor,
page_remove_rmap(page);
page_cache_release(page);
acct_update_integrals();
mm->rss--;
(*mapcount)--;
}
......
......@@ -24,7 +24,6 @@
#include <linux/module.h>
#include <linux/rmap.h>
#include <linux/security.h>
#include <linux/acct.h>
#include <linux/backing-dev.h>
#include <linux/syscalls.h>
......@@ -437,8 +436,6 @@ unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
page_add_anon_rmap(page, vma, address);
swap_free(entry);
acct_update_integrals();
update_mem_hiwater();
}
/* vma->vm_mm->page_table_lock is held */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment