Commit e6a9fb1e authored by Alexander Viro's avatar Alexander Viro Committed by Linus Torvalds

[PATCH] symlink 1/9: infrastructure and explanation

This patch-kit gets past the limit on nested symlinks, without
incompatible API changes _and_ with killing code duplication in most of
the readlink/follow_link pairs.  And no, it's not the old ->getlink()
crap - procfs et.al.  are not special-cased there. 

Here's how it works:
 * ->follow_link() still does what it used to do - replaces
   vfsmount/dentry in the nameidata it got from caller.  However, it can
   also leave a pathname to be resolved by caller. 
 * we add an array of char * into nameidata; we always work with
   nd->saved_names[current->link_count].  nd_set_link() sets it,
   nd_get_link() returns it.
 * callers of ->follow_link() (all two of them) check if ->follow_link()
   had left us something to do.  If it had (return value was zero and
   nd_get_link() is non-NULL), they do __vfs_follow_link() on that name.
   Then they call a new method (->put_link()) that frees whatever has to
   be freed, etc. 

Note that absolute majority of symlinks have "resolve a pathname" as
part of their ->follow_link(); they can do something else and some don't
do that at all, but having that pathname resolution is very, very
common. 

With that change we allow them to shift pathname resolution part to
caller.  They don't have to - it's perfectly OK to do all work in
->follow_link().  However, leaving the pathname resolution to caller
will
   a) exclude foo_follow_link() stack frame from the picture
   b) kill 2 stack frames - all callers are in fs/namei.c
and they can use inlined variant of vfs_follow_link().

That reduction of stack use is enough to push the limit on nested
symlinks from 5 to 8 (actually, even beyond that, but since 8 is common
for other Unices it will do fine). 

For those who have "pure" ->follow_link() (i.e.  "find a string that
would be symlink contents and say nd_set_link(nd, string)") we also get
a common helper implementing ->readlink() - it just calls
->follow_link() on a dummy nameidata, calls vfs_readlink() on result of
nd_get_link() and does ->put_link().  Using (or not using) it is up to
filesystem; it's a helper that can be used as a ->readlink() for many
filesystems, not a reimplementation of sys_readlink().  However, that's
_MANY_ filesystems - practically all of them. 

Note that we don't put any crap like "if this is a normal symlink, do
this; otherwise call ->follow_link() and let it do its magic" into
callers - all symlinks are handled the same way.  Which was the main
problem with getlink proposal back then. 

That covers almost everything; the only cases left are nfs, ncpfs and
cifs.  Those will go later - we are backwards compatible, so it's not a
problem. 

First patch: infrastructure - helpers allowing ->follow_link() to leave
a pathname to be traversed by caller + corresponding code in callers. 
parent feb70892
...@@ -395,6 +395,8 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s ...@@ -395,6 +395,8 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
return result; return result;
} }
static inline int __vfs_follow_link(struct nameidata *, const char *);
/* /*
* This limits recursive symlink follows to 8, while * This limits recursive symlink follows to 8, while
* limiting consecutive symlinks to 40. * limiting consecutive symlinks to 40.
...@@ -405,19 +407,30 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s ...@@ -405,19 +407,30 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{ {
int err = -ELOOP; int err = -ELOOP;
if (current->link_count >= 5) if (current->link_count >= MAX_NESTED_LINKS)
goto loop; goto loop;
if (current->total_link_count >= 40) if (current->total_link_count >= 40)
goto loop; goto loop;
BUG_ON(nd->depth >= MAX_NESTED_LINKS);
cond_resched(); cond_resched();
err = security_inode_follow_link(dentry, nd); err = security_inode_follow_link(dentry, nd);
if (err) if (err)
goto loop; goto loop;
current->link_count++; current->link_count++;
current->total_link_count++; current->total_link_count++;
nd->depth++;
touch_atime(nd->mnt, dentry); touch_atime(nd->mnt, dentry);
nd_set_link(nd, NULL);
err = dentry->d_inode->i_op->follow_link(dentry, nd); err = dentry->d_inode->i_op->follow_link(dentry, nd);
if (!err) {
char *s = nd_get_link(nd);
if (s)
err = __vfs_follow_link(nd, s);
if (dentry->d_inode->i_op->put_link)
dentry->d_inode->i_op->put_link(dentry, nd);
}
current->link_count--; current->link_count--;
nd->depth--;
return err; return err;
loop: loop:
path_release(nd); path_release(nd);
...@@ -587,7 +600,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd) ...@@ -587,7 +600,7 @@ int fastcall link_path_walk(const char * name, struct nameidata *nd)
goto return_reval; goto return_reval;
inode = nd->dentry->d_inode; inode = nd->dentry->d_inode;
if (current->link_count) if (nd->depth)
lookup_flags = LOOKUP_FOLLOW; lookup_flags = LOOKUP_FOLLOW;
/* At this point we know we have a real path component. */ /* At this point we know we have a real path component. */
...@@ -795,6 +808,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) ...@@ -795,6 +808,7 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
*/ */
nd_root.last_type = LAST_ROOT; nd_root.last_type = LAST_ROOT;
nd_root.flags = nd->flags; nd_root.flags = nd->flags;
nd_root.depth = 0;
memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent)); memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent));
read_lock(&current->fs->lock); read_lock(&current->fs->lock);
nd_root.mnt = mntget(current->fs->rootmnt); nd_root.mnt = mntget(current->fs->rootmnt);
...@@ -867,6 +881,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata ...@@ -867,6 +881,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags; nd->flags = flags;
nd->depth = 0;
read_lock(&current->fs->lock); read_lock(&current->fs->lock);
if (*name=='/') { if (*name=='/') {
...@@ -1385,7 +1400,15 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) ...@@ -1385,7 +1400,15 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
if (error) if (error)
goto exit_dput; goto exit_dput;
touch_atime(nd->mnt, dentry); touch_atime(nd->mnt, dentry);
nd_set_link(nd, NULL);
error = dentry->d_inode->i_op->follow_link(dentry, nd); error = dentry->d_inode->i_op->follow_link(dentry, nd);
if (!error) {
char *s = nd_get_link(nd);
if (s)
error = __vfs_follow_link(nd, s);
if (dentry->d_inode->i_op->put_link)
dentry->d_inode->i_op->put_link(dentry, nd);
}
dput(dentry); dput(dentry);
if (error) if (error)
return error; return error;
...@@ -2182,7 +2205,7 @@ __vfs_follow_link(struct nameidata *nd, const char *link) ...@@ -2182,7 +2205,7 @@ __vfs_follow_link(struct nameidata *nd, const char *link)
} }
res = link_path_walk(link, nd); res = link_path_walk(link, nd);
out: out:
if (current->link_count || res || nd->last_type!=LAST_NORM) if (nd->depth || res || nd->last_type!=LAST_NORM)
return res; return res;
/* /*
* If it is an iterative symlinks resolution in open_namei() we * If it is an iterative symlinks resolution in open_namei() we
......
...@@ -902,6 +902,7 @@ struct inode_operations { ...@@ -902,6 +902,7 @@ struct inode_operations {
struct inode *, struct dentry *); struct inode *, struct dentry *);
int (*readlink) (struct dentry *, char __user *,int); int (*readlink) (struct dentry *, char __user *,int);
int (*follow_link) (struct dentry *, struct nameidata *); int (*follow_link) (struct dentry *, struct nameidata *);
void (*put_link) (struct dentry *, struct nameidata *);
void (*truncate) (struct inode *); void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, struct nameidata *); int (*permission) (struct inode *, int, struct nameidata *);
int (*setattr) (struct dentry *, struct iattr *); int (*setattr) (struct dentry *, struct iattr *);
......
...@@ -10,12 +10,16 @@ struct open_intent { ...@@ -10,12 +10,16 @@ struct open_intent {
int create_mode; int create_mode;
}; };
enum { MAX_NESTED_LINKS = 5 };
struct nameidata { struct nameidata {
struct dentry *dentry; struct dentry *dentry;
struct vfsmount *mnt; struct vfsmount *mnt;
struct qstr last; struct qstr last;
unsigned int flags; unsigned int flags;
int last_type; int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
/* Intent data */ /* Intent data */
union { union {
...@@ -67,4 +71,14 @@ extern int follow_up(struct vfsmount **, struct dentry **); ...@@ -67,4 +71,14 @@ extern int follow_up(struct vfsmount **, struct dentry **);
extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *);
static inline void nd_set_link(struct nameidata *nd, char *path)
{
nd->saved_names[nd->depth] = path;
}
static inline char *nd_get_link(struct nameidata *nd)
{
return nd->saved_names[nd->depth];
}
#endif /* _LINUX_NAMEI_H */ #endif /* _LINUX_NAMEI_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment