Btrfs: add and improve comments

This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason <chris.mason@oracle.com>

Btrfs: add and improve comments
This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
d352ac68 · Chris Mason · 9a5e1ea1 · d352ac68 · 9a5e1ea1 · d352ac68
Commit d352ac68 authored Sep 29, 2008 by Chris Mason
25 changed files
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
 obj-m  := btrfs.o
 btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   file-item.o inode-item.o inode-map.o disk-io.o \
-	   transaction.o bit-radix.o inode.o file.o tree-defrag.o \
+	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
 	   ref-cache.o export.o tree-log.o acl.o free-space-cache.o

--- a/fs/btrfs/TODO
+++ b/fs/btrfs/TODO
-* cleanup, add more error checking, get rid of BUG_ONs
-* Fix ENOSPC handling
-* Make allocator smarter
-* add a block group to struct inode
-* Do actual block accounting
-* Check compat and incompat flags on the inode
-* Get rid of struct ctree_path, limiting tree levels held at one time
-* Add generation number to key pointer in nodes
-* Add generation number to inode
-* forbid cross subvolume renames and hardlinks
-* Release
-* Do real tree locking
-* Add extent mirroring (backup copies of blocks)
-* Add fancy interface to get access to incremental backups
-* Add fancy striped extents to make big reads faster
-* Use relocation to try and fix write errors
-* Make allocator much smarter
-* xattrs (directory streams for regular files)
-* Scrub & defrag
-
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)

 	/*
 	 * if we pick a busy task, move the task to the end of the list.
-	 * hopefully this will keep things somewhat evenly balanced
+	 * hopefully this will keep things somewhat evenly balanced.
+	 * Do the move in batches based on the sequence number.  This groups
+	 * requests submitted at roughly the same time onto the same worker.
 	 */
 	next = workers->worker_list.next;
 	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
 	atomic_inc(&worker->num_pending);
 	worker->sequence++;
+
 	if (worker->sequence % workers->idle_thresh == 0)
 		list_move_tail(next, &workers->worker_list);
 	return worker;
 }

+/*
+ * selects a worker thread to take the next job.  This will either find
+ * an idle worker, start a new worker up to the max count, or just return
+ * one of the existing busy workers.
+ */
 static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
 {
 	struct btrfs_worker_thread *worker;

--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -63,14 +63,17 @@ struct btrfs_workers {
 	/* once a worker has this many requests or fewer, it is idle */
 	int idle_thresh;

-	/* list with all the work threads */
+	/* list with all the work threads.  The workers on the idle thread
+	 * may be actively servicing jobs, but they haven't yet hit the
+	 * idle thresh limit above.
+	 */
 	struct list_head worker_list;
 	struct list_head idle_list;

 	/* lock for finding the next worker thread to queue on */
 	spinlock_t lock;

-	/* extra name for this worker */
+	/* extra name for this worker, used for current->name */
 	char *name;
 };


--- a/fs/btrfs/bit-radix.c
+++ b/fs/btrfs/bit-radix.c
-/*
- * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "bit-radix.h"
-
-#define BIT_ARRAY_BYTES 256
-#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
-
-extern struct kmem_cache *btrfs_bit_radix_cachep;
-int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
-	unsigned long *bits;
-	unsigned long slot;
-	int bit_slot;
-	int ret;
-
-	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
-	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
-	bits = radix_tree_lookup(radix, slot);
-	if (!bits) {
-		bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
-		if (!bits)
-			return -ENOMEM;
-		memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
-		bits[0] = slot;
-		ret = radix_tree_insert(radix, slot, bits);
-		if (ret)
-			return ret;
-	}
-	ret = test_and_set_bit(bit_slot, bits + 1);
-	if (ret < 0)
-		ret = 1;
-	return ret;
-}
-
-int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
-	unsigned long *bits;
-	unsigned long slot;
-	int bit_slot;
-
-	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
-	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
-	bits = radix_tree_lookup(radix, slot);
-	if (!bits)
-		return 0;
-	return test_bit(bit_slot, bits + 1);
-}
-
-int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
-{
-	unsigned long *bits;
-	unsigned long slot;
-	int bit_slot;
-	int i;
-	int empty = 1;
-
-	slot = bit / BIT_RADIX_BITS_PER_ARRAY;
-	bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
-
-	bits = radix_tree_lookup(radix, slot);
-	if (!bits)
-		return 0;
-	clear_bit(bit_slot, bits + 1);
-	for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
-		if (bits[i]) {
-			empty = 0;
-			break;
-		}
-	}
-	if (empty) {
-		bits = radix_tree_delete(radix, slot);
-		BUG_ON(!bits);
-		kmem_cache_free(btrfs_bit_radix_cachep, bits);
-	}
-	return 0;
-}
-
-int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
-			 unsigned long start, int nr)
-{
-	unsigned long *bits;
-	unsigned long *gang[4];
-	int found;
-	int ret;
-	int i;
-	int total_found = 0;
-	unsigned long slot;
-
-	slot = start / BIT_RADIX_BITS_PER_ARRAY;
-	ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
-				     ARRAY_SIZE(gang));
-	found = start % BIT_RADIX_BITS_PER_ARRAY;
-	for (i = 0; i < ret && nr > 0; i++) {
-		bits = gang[i];
-		while(nr > 0) {
-			found = find_next_bit(bits + 1,
-					      BIT_RADIX_BITS_PER_ARRAY,
-					      found);
-			if (found < BIT_RADIX_BITS_PER_ARRAY) {
-				*retbits = bits[0] *
-					BIT_RADIX_BITS_PER_ARRAY + found;
-				retbits++;
-				nr--;
-				total_found++;
-				found++;
-			} else
-				break;
-		}
-		found = 0;
-	}
-	return total_found;
-}
--- a/fs/btrfs/bit-radix.h
+++ b/fs/btrfs/bit-radix.h
-/*
- * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BIT_RADIX__
-#define __BIT_RADIX__
-#include <linux/radix-tree.h>
-
-int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
-int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
-			 unsigned long start, int nr);
-
-static inline void init_bit_radix(struct radix_tree_root *radix)
-{
-	INIT_RADIX_TREE(radix, GFP_NOFS);
-}
-#endif
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -25,27 +25,58 @@

 /* in memory btrfs inode */
 struct btrfs_inode {
+	/* which subvolume this inode belongs to */
 	struct btrfs_root *root;
+
+	/* the block group preferred for allocations.  This pointer is buggy
+	 * and needs to be replaced with a bytenr instead
+	 */
 	struct btrfs_block_group_cache *block_group;
+
+	/* key used to find this inode on disk.  This is used by the code
+	 * to read in roots of subvolumes
+	 */
 	struct btrfs_key location;
+
+	/* the extent_tree has caches of all the extent mappings to disk */
 	struct extent_map_tree extent_tree;
+
+	/* the io_tree does range state (DIRTY, LOCKED etc) */
 	struct extent_io_tree io_tree;
+
+	/* special utility tree used to record which mirrors have already been
+	 * tried when checksums fail for a given block
+	 */
 	struct extent_io_tree io_failure_tree;
+
+	/* held while inserting checksums to avoid races */
 	struct mutex csum_mutex;
+
+	/* held while inesrting or deleting extents from files */
 	struct mutex extent_mutex;
+
+	/* held while logging the inode in tree-log.c */
 	struct mutex log_mutex;
-	struct inode vfs_inode;
+
+	/* used to order data wrt metadata */
 	struct btrfs_ordered_inode_tree ordered_tree;

+	/* standard acl pointers */
 	struct posix_acl *i_acl;
 	struct posix_acl *i_default_acl;

 	/* for keeping track of orphaned inodes */
 	struct list_head i_orphan;

+	/* list of all the delalloc inodes in the FS.  There are times we need
+	 * to write all the delalloc pages to disk, and this list is used
+	 * to walk them all.
+	 */
 	struct list_head delalloc_inodes;

-	/* full 64 bit generation number */
+	/* full 64 bit generation number, struct vfs_inode doesn't have a big
+	 * enough field for this.
+	 */
 	u64 generation;

 	/*
@@ -57,10 +88,25 @@ struct btrfs_inode {
 	 */
 	u64 logged_trans;

-	/* trans that last made a change that should be fully fsync'd */
+	/*
+	 * trans that last made a change that should be fully fsync'd.  This
+	 * gets reset to zero each time the inode is logged
+	 */
 	u64 log_dirty_trans;
+
+	/* total number of bytes pending delalloc, used by stat to calc the
+	 * real block usage of the file
+	 */
 	u64 delalloc_bytes;
+
+	/*
+	 * the size of the file stored in the metadata on disk.  data=ordered
+	 * means the in-memory i_size might be larger than the size on disk
+	 * because not all the blocks are written yet.
+	 */
 	u64 disk_i_size;
+
+	/* flags field from the on disk inode */
 	u32 flags;

 	/*
@@ -68,6 +114,8 @@ struct btrfs_inode {
 	 * number for new files that are created
 	 */
 	u64 index_cnt;
+
+	struct inode vfs_inode;
 };

 static inline struct btrfs_inode *BTRFS_I(struct inode *inode)

--- a/fs/btrfs/crc32c.h
+++ b/fs/btrfs/crc32c.h
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
 #ifndef __BTRFS_CRC32C__
 #define __BTRFS_CRC32C__
 #include <asm/byteorder.h>

--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -27,7 +27,6 @@
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
 #include <asm/kmap_types.h>
-#include "bit-radix.h"
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"

--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,14 @@
 #include "hash.h"
 #include "transaction.h"

+/*
+ * insert a name into a directory, doing overflow properly if there is a hash
+ * collision.  data_size indicates how big the item inserted should be.  On
+ * success a struct btrfs_dir_item pointer is returned, otherwise it is
+ * an ERR_PTR.
+ *
+ * The name is not copied into the dir item, you have to do that yourself.
+ */
 static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
 						   *trans,
 						   struct btrfs_root *root,
@@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
 	return (struct btrfs_dir_item *)ptr;
 }

+/*
+ * xattrs work a lot like directories, this inserts an xattr item
+ * into the tree
+ */
 int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, const char *name,
 			    u16 name_len, const void *data, u16 data_len,
@@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
 	return ret;
 }

+/*
+ * insert a directory item in the tree, doing all the magic for
+ * both indexes. 'dir' indicates which objectid to insert it into,
+ * 'location' is the key to stuff into the directory item, 'type' is the
+ * type of the inode we're pointing to, and 'index' is the sequence number
+ * to use for the second index (if one is created).
+ */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 			  *root, const char *name, int name_len, u64 dir,
 			  struct btrfs_key *location, u8 type, u64 index)
@@ -184,6 +203,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 	return 0;
 }

+/*
+ * lookup a directory item based on name.  'dir' is the objectid
+ * we're searching in, and 'mod' tells us if you plan on deleting the
+ * item (use mod < 0) or changing the options (use mod > 0)
+ */
 struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 					     struct btrfs_root *root,
 					     struct btrfs_path *path, u64 dir,
@@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 	return btrfs_match_dir_item_name(root, path, name, name_len);
 }

+/*
+ * lookup a directory item based on index.  'dir' is the objectid
+ * we're searching in, and 'mod' tells us if you plan on deleting the
+ * item (use mod < 0) or changing the options (use mod > 0)
+ *
+ * The name is used to make sure the index really points to the name you were
+ * looking for.
+ */
 struct btrfs_dir_item *
 btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root,
@@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 	return btrfs_match_dir_item_name(root, path, name, name_len);
 }

+/*
+ * helper function to look at the directory item pointed to by 'path'
+ * this walks through all the entries in a dir item and finds one
+ * for a specific name.
+ */
 struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
 			      struct btrfs_path *path,
 			      const char *name, int name_len)
@@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
 	return NULL;
 }

+/*
+ * given a pointer into a directory item, delete it.  This
+ * handles items that have more than one entry in them.
+ */
 int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct btrfs_path *path,

--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);

+/*
+ * end_io_wq structs are used to do processing in task context when an IO is
+ * complete.  This is used during reads to verify checksums, and it is used
+ * by writes to insert metadata for new file extents after IO is complete.
+ */
 struct end_io_wq {
 	struct bio *bio;
 	bio_end_io_t *end_io;
@@ -66,6 +71,11 @@ struct end_io_wq {
 	struct btrfs_work work;
 };

+/*
+ * async submit bios are used to offload expensive checksumming
+ * onto the worker threads.  They checksum file and metadata bios
+ * just before they are sent down the IO stack.
+ */
 struct async_submit_bio {
 	struct inode *inode;
 	struct bio *bio;
@@ -76,6 +86,10 @@ struct async_submit_bio {
 	struct btrfs_work work;
 };

+/*
+ * extents on the btree inode are pretty simple, there's one extent
+ * that covers the entire device
+ */
 struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
 				    size_t page_offset, u64 start, u64 len,
 				    int create)
@@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result)
 	*(__le32 *)result = ~cpu_to_le32(crc);
 }

+/*
+ * compute the csum for a btree block, and either verify it or write it
+ * into the csum field of the block.
+ */
 static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 			   int verify)
 {
@@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 	return 0;
 }

+/*
+ * we can't consider a given block up to date unless the transid of the
+ * block matches the transid in the parent node's pointer.  This is how we
+ * detect blocks that either didn't get written at all or got written
+ * in the wrong place.
+ */
 static int verify_parent_transid(struct extent_io_tree *io_tree,
 				 struct extent_buffer *eb, u64 parent_transid)
 {
@@ -228,9 +252,12 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 	unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
 		      GFP_NOFS);
 	return ret;
-
 }

+/*
+ * helper to read a given tree block, doing retries as required when
+ * the checksums don't match and we have alternate mirrors to try.
+ */
 static int btree_read_extent_buffer_pages(struct btrfs_root *root,
 					  struct extent_buffer *eb,
 					  u64 start, u64 parent_transid)
@@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror
 	return -EIO;
 }

+/*
+ * checksum a dirty tree block before IO.  This has extra checks to make
+ * sure we only fill in the checksum field in the first page of a multi-page block
+ */
 int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 {
 	struct extent_io_tree *tree;

--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 }
 EXPORT_SYMBOL(wait_on_extent_writeback);

+/*
+ * either insert or lock state struct between start and end use mask to tell
+ * us if waiting is desired.
+ */
 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
 {
 	int err;
@@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 }
 EXPORT_SYMBOL(set_range_writeback);

+/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
 			  u64 *start_ret, u64 *end_ret, int bits)
 {
@@ -1017,6 +1028,10 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
 }
 EXPORT_SYMBOL(find_first_extent_bit);

+/* find the first state struct with 'bits' set after 'start', and
+ * return it.  tree->lock must be held.  NULL will returned if
+ * nothing was found after 'start'
+ */
 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
 						 u64 start, int bits)
 {
@@ -1046,8 +1061,14 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
 }
 EXPORT_SYMBOL(find_first_extent_bit_state);

-u64 find_lock_delalloc_range(struct extent_io_tree *tree,
-			     u64 *start, u64 *end, u64 max_bytes)
+/*
+ * find a contiguous range of bytes in the file marked as delalloc, not
+ * more than 'max_bytes'.  start and end are used to return the range,
+ *
+ * 1 is returned if we find something, 0 if nothing was in the tree
+ */
+static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree,
+					     u64 *start, u64 *end, u64 max_bytes)
 {
 	struct rb_node *node;
 	struct extent_state *state;
@@ -1130,6 +1151,11 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
 	return found;
 }

+/*
+ * count the number of bytes in the tree that have a given bit(s)
+ * set.  This can be fairly slow, except for EXTENT_DIRTY which is
+ * cached.  The total number found is returned.
+ */
 u64 count_range_bits(struct extent_io_tree *tree,
 		     u64 *start, u64 search_end, u64 max_bytes,
 		     unsigned long bits)
@@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
 }
 EXPORT_SYMBOL(unlock_range);

+/*
+ * set the private field for a given byte offset in the tree.  If there isn't
+ * an extent_state there already, this does nothing.
+ */
 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
 {
 	struct rb_node *node;

--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
 	return NULL;
 }

+/*
+ * search through the tree for an extent_map with a given offset.  If
+ * it can't be found, try to find some neighboring extents
+ */
 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
 				     struct rb_node **prev_ret,
 				     struct rb_node **next_ret)
@@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
 	return NULL;
 }

+/*
+ * look for an offset in the tree, and if it can't be found, return
+ * the first offset we can find smaller than 'offset'.
+ */
 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
 {
 	struct rb_node *prev;
@@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
 	return ret;
 }

+/* check to see if two extent_map structs are adjacent and safe to merge */
 static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
 	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
@@ -250,6 +259,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
 }
 EXPORT_SYMBOL(add_extent_mapping);

+/* simple helper to do math around the end of an extent, handling wrap */
 static u64 range_end(u64 start, u64 len)
 {
 	if (start + len < start)

--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,9 @@
 #include "compat.h"


+/* simple helper to fault in pages and copy.  This should go away
+ * and be replaced with calls into generic code.
+ */
 static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
 					 int write_bytes,
 					 struct page **prepared_pages,
@@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
 	return page_fault ? -EFAULT : 0;
 }

+/*
+ * unlocks pages after btrfs_file_write is done with them
+ */
 static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
 		if (!pages[i])
 			break;
+		/* page checked is some magic around finding pages that
+		 * have been modified without going through btrfs_set_page_dirty
+		 * clear it here
+		 */
 		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
 		mark_page_accessed(pages[i]);
@@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }

+/* this does all the hard work for inserting an inline extent into
+ * the btree.  Any existing inline extent is extended as required to make room,
+ * otherwise things are inserted as required into the btree
+ */
 static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 offset, size_t size,
@@ -228,6 +242,14 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 	return err;
 }

+/*
+ * after copy_from_user, pages need to be dirtied and we need to make
+ * sure holes are created between the current EOF and the start of
+ * any next extents (if required).
+ *
+ * this also makes the decision about creating an inline extent vs
+ * doing real data extents, marking pages dirty and delalloc as required.
+ */
 static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct file *file,
@@ -362,6 +384,10 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	return err;
 }

+/*
+ * this drops all the extents in the cache that intersect the range
+ * [start, end].  Existing extents are split as required.
+ */
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			    int skip_pinned)
 {
@@ -536,6 +562,9 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 * If an extent intersects the range but is not entirely inside the range
 * it is either truncated or split.  Anything entirely inside the range
 * is deleted from the tree.
+ *
+ * inline_limit is used to tell this code which offsets in the file to keep
+ * if they contain inline extents.
 */
 int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
@@ -796,7 +825,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 }

 /*
- * this gets pages into the page cache and locks them down
+ * this gets pages into the page cache and locks them down, it also properly
+ * waits for data=ordered extents to finish before allowing the pages to be
+ * modified.
 */
 static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
 			 struct page **pages, size_t num_pages,
@@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }

+/*
+ * fsync call for both files and directories.  This logs the inode into
+ * the tree log instead of forcing full commits whenever possible.
+ *
+ * It needs to call filemap_fdatawait so that all ordered extent updates are
+ * in the metadata btree are up to date for copying to the log.
+ *
+ * It drops the inode mutex before doing the tree log commit.  This is an
+ * important optimization for directories because holding the mutex prevents
+ * new operations on the dir while we write to disk.
+ */
 int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;

--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -25,6 +25,15 @@
 #include "extent_io.h"
 #include "locking.h"

+/*
+ * locks the per buffer mutex in an extent buffer.  This uses adaptive locks
+ * and the spin is not tuned very extensively.  The spinning does make a big
+ * difference in almost every workload, but spinning for the right amount of
+ * time needs some help.
+ *
+ * In general, we want to spin as long as the lock holder is doing btree searches,
+ * and we should give up if they are in more expensive code.
+ */
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
 	int i;
@@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb)
 	return mutex_is_locked(&eb->mutex);
 }

+/*
+ * btrfs_search_slot uses this to decide if it should drop its locks
+ * before doing something expensive like allocating free blocks for cow.
+ */
 int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
 {
 	int i;

--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -26,7 +26,6 @@
 #include "btrfs_inode.h"
 #include "extent_io.h"

-
 static u64 entry_end(struct btrfs_ordered_extent *entry)
 {
 	if (entry->file_offset + entry->len < entry->file_offset)
@@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry)
 	return entry->file_offset + entry->len;
 }

+/* returns NULL if the insertion worked, or it returns the node it did find
+ * in the tree
+ */
 static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
 				   struct rb_node *node)
 {
@@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
 	return NULL;
 }

+/*
+ * look for a given offset in the tree, and if it can't be found return the
+ * first lesser offset
+ */
 static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
 				     struct rb_node **prev_ret)
 {
@@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
 	return NULL;
 }

+/*
+ * helper to check if a given offset is inside a given entry
+ */
 static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
 {
 	if (file_offset < entry->file_offset ||
@@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
 	return 1;
 }

+/*
+ * look find the first ordered struct that has this offset, otherwise
+ * the first one less than this offset
+ */
 static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
 					  u64 file_offset)
 {
@@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 	return 0;
 }

+/*
+ * wait for all the ordered extents in a root.  This is done when balancing
+ * space between drives.
+ */
 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
 {
 	struct list_head splice;

--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -21,6 +21,16 @@
 #include "ref-cache.h"
 #include "transaction.h"

+/*
+ * leaf refs are used to cache the information about which extents
+ * a given leaf has references on.  This allows us to process that leaf
+ * in btrfs_drop_snapshot without needing to read it back from disk.
+ */
+
+/*
+ * kmalloc a leaf reference struct and update the counters for the
+ * total ref cache size
+ */
 struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
 					    int nr_extents)
 {
@@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
 	return ref;
 }

+/*
+ * free a leaf reference struct and update the counters for the
+ * total ref cache size
+ */
 void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 {
 	if (!ref)
@@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
 	return 0;
 }

+/*
+ * find the leaf ref for a given extent.  This returns the ref struct with
+ * a usage reference incremented
+ */
 struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
 					     u64 bytenr)
 {
@@ -160,6 +178,10 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
 	return NULL;
 }

+/*
+ * add a fully filled in leaf ref struct
+ * remove all the refs older than a given root generation
+ */
 int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
 		       int shared)
 {
@@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
 	return ret;
 }

+/*
+ * remove a single leaf ref from the tree.  This drops the ref held by the tree
+ * only
+ */
 int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
 {
 	struct btrfs_leaf_ref_tree *tree;

--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -19,8 +19,11 @@
 #define __REFCACHE__

 struct btrfs_extent_info {
+	/* bytenr and num_bytes find the extent in the extent allocation tree */
 	u64 bytenr;
 	u64 num_bytes;
+
+	/* objectid and offset find the back reference for the file */
 	u64 objectid;
 	u64 offset;
 };

--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -22,8 +22,10 @@
 #include "print-tree.h"

 /*
- * returns 0 on finding something, 1 if no more roots are there
- * and < 0 on error
+ *  search forward for a root, starting with objectid 'search_start'
+ *  if a root key is found, the objectid we find is filled into 'found_objectid'
+ *  and 0 is returned.  < 0 is returned on error, 1 if there is nothing
+ *  left in the tree.
 */
 int btrfs_search_root(struct btrfs_root *root, u64 search_start,
 		      u64 *found_objectid)
@@ -66,6 +68,11 @@ int btrfs_search_root(struct btrfs_root *root, u64 search_start,
 	return ret;
 }

+/*
+ * lookup the root with the highest offset for a given objectid.  The key we do
+ * find is copied into 'key'.  If we find something return 0, otherwise 1, < 0
+ * on error.
+ */
 int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
 			struct btrfs_root_item *item, struct btrfs_key *key)
 {
@@ -104,6 +111,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
 	return ret;
 }

+/*
+ * copy the data in 'item' into the btree
+ */
 int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_key *key, struct btrfs_root_item
 		      *item)
@@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
 	return ret;
 }

+/*
+ * at mount time we want to find all the old transaction snapshots that were in
+ * the process of being deleted if we crashed.  This is any root item with an offset
+ * lower than the latest root.  They need to be queued for deletion to finish
+ * what was happening when we crashed.
+ */
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
 			  struct btrfs_root *latest)
 {
@@ -227,6 +243,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
 	return ret;
 }

+/* drop the root item for 'key' from 'root' */
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_key *key)
 {

--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -17,6 +17,27 @@
 */

 #include <linux/highmem.h>
+
+/* this is some deeply nasty code.  ctree.h has a different
+ * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
+ *
+ * The end result is that anyone who #includes ctree.h gets a
+ * declaration for the btrfs_set_foo functions and btrfs_foo functions
+ *
+ * This file declares the macros and then #includes ctree.h, which results
+ * in cpp creating the function here based on the template below.
+ *
+ * These setget functions do all the extent_buffer related mapping
+ * required to efficiently read and write specific fields in the extent
+ * buffers.  Every pointer to metadata items in btrfs is really just
+ * an unsigned long offset into the extent buffer which has been
+ * cast to a specific type.  This gives us all the gcc type checking.
+ *
+ * The extent buffer api is used to do all the kmapping and page
+ * spanning work required to get extent buffers in highmem and have
+ * a metadata blocksize different from the page size.
+ */
+
 #define BTRFS_SETGET_FUNCS(name, type, member, bits)			\
 u##bits btrfs_##name(struct extent_buffer *eb,				\
 				   type *s)				\

--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };

+/*
+ * used by btrfsctl to scan devices when no FS is mounted
+ */
 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {

--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
 	}
 }

+/*
+ * either allocate a new transaction or hop into the existing one
+ */
 static noinline int join_transaction(struct btrfs_root *root)
 {
 	struct btrfs_transaction *cur_trans;
@@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root)
 	return 0;
 }

+/*
+ * this does all the record keeping required to make sure that a
+ * reference counted root is properly recorded in a given transaction.
+ * This is required to make sure the old root from before we joined the transaction
+ * is deleted when the transaction commits
+ */
 noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
 {
 	struct btrfs_dirty_root *dirty;
@@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
 	return 0;
 }

+/* wait for commit against the current transaction to become unblocked
+ * when this is done, it is safe to start a new transaction, but the current
+ * transaction might not be fully on disk.
+ */
 static void wait_current_trans(struct btrfs_root *root)
 {
 	struct btrfs_transaction *cur_trans;
@@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
 	return start_transaction(r, num_blocks, 2);
 }

-
+/* wait for a transaction commit to be fully complete */
 static noinline int wait_for_commit(struct btrfs_root *root,
 				    struct btrfs_transaction *commit)
 {
@@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root,
 	return 0;
 }

+/*
+ * rate limit against the drop_snapshot code.  This helps to slow down new operations
+ * if the drop_snapshot code isn't able to keep up.
+ */
 static void throttle_on_drops(struct btrfs_root *root)
 {
 	struct btrfs_fs_info *info = root->fs_info;
@@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 	return __btrfs_end_transaction(trans, root, 1);
 }

-
+/*
+ * when btree blocks are allocated, they have some corresponding bits set for
+ * them in one of two extent_io trees.  This is used to make sure all of
+ * those extents are on disk for transaction or log commit
+ */
 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 					struct extent_io_tree *dirty_pages)
 {
@@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 					   &trans->transaction->dirty_pages);
 }

+/*
+ * this is used to update the root pointer in the tree of tree roots.
+ *
+ * But, in the case of the extent allocation tree, updating the root
+ * pointer may allocate blocks which may change the root of the extent
+ * allocation tree.
+ *
+ * So, this loops and repeats and makes sure the cowonly root didn't
+ * change while the root pointer was being updated in the metadata.
+ */
 static int update_cowonly_root(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
@@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 	return 0;
 }

+/*
+ * update all the cowonly tree roots on disk
+ */
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root)
 {
@@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
 	return 0;
 }

+/*
+ * dead roots are old snapshots that need to be deleted.  This allocates
+ * a dirty root struct and adds it into the list of dead roots that need to
+ * be deleted
+ */
 int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
 {
 	struct btrfs_dirty_root *dirty;
@@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
 	return 0;
 }

+/*
+ * at transaction commit time we need to schedule the old roots for
+ * deletion via btrfs_drop_snapshot.  This runs through all the
+ * reference counted roots that were modified in the current
+ * transaction and puts them into the drop list
+ */
 static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
 				    struct radix_tree_root *radix,
 				    struct list_head *list)
@@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
 	return err;
 }

+/*
+ * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
+ * otherwise every leaf in the btree is read and defragged.
+ */
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 {
 	struct btrfs_fs_info *info = root->fs_info;
@@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 	return 0;
 }

+/*
+ * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
+ * all of them
+ */
 static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
 				     struct list_head *list)
 {
@@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
 	return ret;
 }

+/*
+ * new snapshots need to be created at a very specific time in the
+ * transaction commit.  This does the actual creation
+ */
 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 				   struct btrfs_fs_info *fs_info,
 				   struct btrfs_pending_snapshot *pending)
@@ -734,6 +791,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	return ret;
 }

+/*
+ * create all the snapshots we've scheduled for creation
+ */
 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 					     struct btrfs_fs_info *fs_info)
 {
@@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	return ret;
 }

+/*
+ * interface function to delete all the snapshots we have scheduled for deletion
+ */
 int btrfs_clean_old_snapshots(struct btrfs_root *root)
 {
 	struct list_head dirty_roots;

--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -23,6 +23,10 @@
 #include "transaction.h"
 #include "locking.h"

+/* defrag all the leaves in a given btree.  If cache_only == 1, don't read things
+ * from disk, otherwise read all the leaves and try to get key order to
+ * better reflect disk order
+ */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, int cache_only)
 {