Commit 3faa61fe authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ipc semaphore optimization

From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>

This patch proposes a performance fix for the current IPC semaphore
implementation.

There are two shortcoming in the current implementation:
try_atomic_semop() was called two times to wake up a blocked process,
once from the update_queue() (executed from the process that wakes up
the sleeping process) and once in the retry part of the blocked process
(executed from the block process that gets woken up).

A second issue is that when several sleeping processes that are eligible
for wake up, they woke up in daisy chain formation and each one in turn
to wake up next process in line.  However, every time when a process
wakes up, it start scans the wait queue from the beginning, not from
where it was last scanned.  This causes large number of unnecessary
scanning of the wait queue under a situation of deep wait queue.
Blocked processes come and go, but chances are there are still quite a
few blocked processes sit at the beginning of that queue.

What we are proposing here is to merge the portion of the code in the
bottom part of sys_semtimedop() (code that gets executed when a sleeping
process gets woken up) into update_queue() function.  The benefit is two
folds: (1) is to reduce redundant calls to try_atomic_semop() and (2) to
increase efficiency of finding eligible processes to wake up and higher
concurrency for multiple wake-ups.

We have measured that this patch improves throughput for a large
application significantly on a industry standard benchmark.

This patch is relative to 2.5.72.  Any feedback is very much
appreciated.

Some kernel profile data attached:

  Kernel profile before optimization:
  -----------------------------------------------
                0.05    0.14   40805/529060      sys_semop [133]
                0.55    1.73  488255/529060      ia64_ret_from_syscall
[2]
[52]     2.5    0.59    1.88  529060         sys_semtimedop [52]
                0.05    0.83  477766/817966      schedule_timeout [62]
                0.34    0.46  529064/989340      update_queue [61]
                0.14    0.00 1006740/6473086     try_atomic_semop [75]
                0.06    0.00  529060/989336      ipcperms [149]
  -----------------------------------------------

                0.30    0.40  460276/989340      semctl_main [68]
                0.34    0.46  529064/989340      sys_semtimedop [52]
[61]     1.5    0.64    0.87  989340         update_queue [61]
                0.75    0.00 5466346/6473086     try_atomic_semop [75]
                0.01    0.11  477676/576698      wake_up_process [146]
  -----------------------------------------------
                0.14    0.00 1006740/6473086     sys_semtimedop [52]
                0.75    0.00 5466346/6473086     update_queue [61]
[75]     0.9    0.89    0.00 6473086         try_atomic_semop [75]
  -----------------------------------------------

  Kernel profile with optimization:

  -----------------------------------------------
                0.03    0.05   26139/503178      sys_semop [155]
                0.46    0.92  477039/503178      ia64_ret_from_syscall
[2]
[61]     1.2    0.48    0.97  503178         sys_semtimedop [61]
                0.04    0.79  470724/784394      schedule_timeout [62]
                0.05    0.00  503178/3301773     try_atomic_semop [109]
                0.05    0.00  503178/930934      ipcperms [149]
                0.00    0.03   32454/460210      update_queue [99]
  -----------------------------------------------
                0.00    0.03   32454/460210      sys_semtimedop [61]
                0.06    0.36  427756/460210      semctl_main [75]
[99]     0.4    0.06    0.39  460210         update_queue [99]
                0.30    0.00 2798595/3301773     try_atomic_semop [109]
                0.00    0.09  470630/614097      wake_up_process [146]
  -----------------------------------------------
                0.05    0.00  503178/3301773     sys_semtimedop [61]
                0.30    0.00 2798595/3301773     update_queue [99]
[109]    0.3    0.35    0.00 3301773         try_atomic_semop [109]
  -----------------------------------------------=20

Both number of function calls to try_atomic_semop() and update_queue()
are reduced by 50% as a result of the merge.  Execution time of
sys_semtimedop is reduced because of the reduction in the low level
functions.
parent d8d90b60
...@@ -109,7 +109,6 @@ struct sem_queue { ...@@ -109,7 +109,6 @@ struct sem_queue {
int id; /* internal sem id */ int id; /* internal sem id */
struct sembuf * sops; /* array of pending operations */ struct sembuf * sops; /* array of pending operations */
int nsops; /* number of operations */ int nsops; /* number of operations */
int alter; /* operation will alter semaphore */
}; };
/* Each task has a list of undo requests. They are executed automatically /* Each task has a list of undo requests. They are executed automatically
......
...@@ -49,6 +49,10 @@ ...@@ -49,6 +49,10 @@
* increase. If there are decrement operations in the operations * increase. If there are decrement operations in the operations
* array we do the same as before. * array we do the same as before.
* *
* With the incarnation of O(1) scheduler, it becomes unnecessary to perform
* check/retry algorithm for waking up blocked processes as the new scheduler
* is better at handling thread switch than the old one.
*
* /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
* *
* SMP-threaded, sysctl's added * SMP-threaded, sysctl's added
...@@ -258,8 +262,7 @@ static inline void remove_from_queue (struct sem_array * sma, ...@@ -258,8 +262,7 @@ static inline void remove_from_queue (struct sem_array * sma,
*/ */
static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
int nsops, struct sem_undo *un, int pid, int nsops, struct sem_undo *un, int pid)
int do_undo)
{ {
int result, sem_op; int result, sem_op;
struct sembuf *sop; struct sembuf *sop;
...@@ -289,10 +292,6 @@ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, ...@@ -289,10 +292,6 @@ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
curr->semval = result; curr->semval = result;
} }
if (do_undo) {
result = 0;
goto undo;
}
sop--; sop--;
while (sop >= sops) { while (sop >= sops) {
sma->sem_base[sop->sem_num].sempid = pid; sma->sem_base[sop->sem_num].sempid = pid;
...@@ -334,23 +333,14 @@ static void update_queue (struct sem_array * sma) ...@@ -334,23 +333,14 @@ static void update_queue (struct sem_array * sma)
for (q = sma->sem_pending; q; q = q->next) { for (q = sma->sem_pending; q; q = q->next) {
if (q->status == 1)
continue; /* this one was woken up before */
error = try_atomic_semop(sma, q->sops, q->nsops, error = try_atomic_semop(sma, q->sops, q->nsops,
q->undo, q->pid, q->alter); q->undo, q->pid);
/* Does q->sleeper still need to sleep? */ /* Does q->sleeper still need to sleep? */
if (error <= 0) { if (error <= 0) {
/* Found one, wake it up */
wake_up_process(q->sleeper);
if (error == 0 && q->alter) {
/* if q-> alter let it self try */
q->status = 1;
return;
}
q->status = error; q->status = error;
remove_from_queue(sma,q); remove_from_queue(sma,q);
wake_up_process(q->sleeper);
} }
} }
} }
...@@ -1062,7 +1052,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf *tsops, ...@@ -1062,7 +1052,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf *tsops,
if (error) if (error)
goto out_unlock_free; goto out_unlock_free;
error = try_atomic_semop (sma, sops, nsops, un, current->pid, 0); error = try_atomic_semop (sma, sops, nsops, un, current->pid);
if (error <= 0) if (error <= 0)
goto update; goto update;
...@@ -1075,55 +1065,46 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf *tsops, ...@@ -1075,55 +1065,46 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf *tsops,
queue.nsops = nsops; queue.nsops = nsops;
queue.undo = un; queue.undo = un;
queue.pid = current->pid; queue.pid = current->pid;
queue.alter = decrease;
queue.id = semid; queue.id = semid;
if (alter) if (alter)
append_to_queue(sma ,&queue); append_to_queue(sma ,&queue);
else else
prepend_to_queue(sma ,&queue); prepend_to_queue(sma ,&queue);
for (;;) { queue.status = -EINTR;
queue.status = -EINTR; queue.sleeper = current;
queue.sleeper = current; current->state = TASK_INTERRUPTIBLE;
current->state = TASK_INTERRUPTIBLE; sem_unlock(sma);
sem_unlock(sma);
if (timeout) if (timeout)
jiffies_left = schedule_timeout(jiffies_left); jiffies_left = schedule_timeout(jiffies_left);
else else
schedule(); schedule();
sma = sem_lock(semid); sma = sem_lock(semid);
if(sma==NULL) { if(sma==NULL) {
if(queue.prev != NULL) if(queue.prev != NULL)
BUG(); BUG();
error = -EIDRM; error = -EIDRM;
goto out_free; goto out_free;
} }
/*
* If queue.status == 1 we where woken up and /*
* have to retry else we simply return. * If queue.status != -EINTR we are woken up by another process
* If an interrupt occurred we have to clean up the */
* queue error = queue.status;
* if (queue.status != -EINTR) {
*/ goto out_unlock_free;
if (queue.status == 1)
{
error = try_atomic_semop (sma, sops, nsops, un,
current->pid,0);
if (error <= 0)
break;
} else {
error = queue.status;
if (error == -EINTR && timeout && jiffies_left == 0)
error = -EAGAIN;
if (queue.prev) /* got Interrupt */
break;
/* Everything done by update_queue */
goto out_unlock_free;
}
} }
/*
* If an interrupt occurred we have to clean up the queue
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
remove_from_queue(sma,&queue); remove_from_queue(sma,&queue);
goto out_unlock_free;
update: update:
if (alter) if (alter)
update_queue (sma); update_queue (sma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment