Skip to content

Commit a5ed918

Browse files
osandovmasoncl
authored andcommitted
Btrfs: implement the free space B-tree
The free space cache has turned out to be a scalability bottleneck on large, busy filesystems. When the cache for a lot of block groups needs to be written out, we can get extremely long commit times; if this happens in the critical section, things are especially bad because we block new transactions from happening. The main problem with the free space cache is that it has to be written out in its entirety and is managed in an ad hoc fashion. Using a B-tree to store free space fixes this: updates can be done as needed and we get all of the benefits of using a B-tree: checksumming, RAID handling, well-understood behavior. With the free space tree, we get commit times that are about the same as the no cache case with load times slower than the free space cache case but still much faster than the no cache case. Free space is represented with extents until it becomes more space-efficient to use bitmaps, giving us similar space overhead to the free space cache. The operations on the free space tree are: adding and removing free space, handling the creation and deletion of block groups, and loading the free space for a block group. We can also create the free space tree by walking the extent tree and clear the free space tree. Signed-off-by: Omar Sandoval <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent 208acb8 commit a5ed918

File tree

5 files changed

+1686
-4
lines changed

5 files changed

+1686
-4
lines changed

fs/btrfs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
99
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
1010
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
1111
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12-
uuid-tree.o props.o hash.o
12+
uuid-tree.o props.o hash.o free-space-tree.o
1313

1414
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
1515
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

fs/btrfs/ctree.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1302,8 +1302,20 @@ struct btrfs_block_group_cache {
13021302
u64 delalloc_bytes;
13031303
u64 bytes_super;
13041304
u64 flags;
1305-
u64 sectorsize;
13061305
u64 cache_generation;
1306+
u32 sectorsize;
1307+
1308+
/*
1309+
* If the free space extent count exceeds this number, convert the block
1310+
* group to bitmaps.
1311+
*/
1312+
u32 bitmap_high_thresh;
1313+
1314+
/*
1315+
* If the free space extent count drops below this number, convert the
1316+
* block group back to extents.
1317+
*/
1318+
u32 bitmap_low_thresh;
13071319

13081320
/*
13091321
* It is just used for the delayed data space allocation because
@@ -1359,6 +1371,15 @@ struct btrfs_block_group_cache {
13591371
struct list_head io_list;
13601372

13611373
struct btrfs_io_ctl io_ctl;
1374+
1375+
/* Lock for free space tree operations. */
1376+
struct mutex free_space_lock;
1377+
1378+
/*
1379+
* Does the block group need to be added to the free space tree?
1380+
* Protected by free_space_lock.
1381+
*/
1382+
int needs_free_space;
13621383
};
13631384

13641385
/* delayed seq elem */
@@ -1410,6 +1431,7 @@ struct btrfs_fs_info {
14101431
struct btrfs_root *csum_root;
14111432
struct btrfs_root *quota_root;
14121433
struct btrfs_root *uuid_root;
1434+
struct btrfs_root *free_space_root;
14131435

14141436
/* the log root tree is a directory of all the other log roots */
14151437
struct btrfs_root *log_root_tree;
@@ -3555,6 +3577,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
35553577
void check_system_chunk(struct btrfs_trans_handle *trans,
35563578
struct btrfs_root *root,
35573579
const u64 type);
3580+
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
3581+
struct btrfs_fs_info *info, u64 start, u64 end);
3582+
35583583
/* ctree.c */
35593584
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
35603585
int level, int *slot);

fs/btrfs/extent-tree.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,8 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
337337
* we need to check the pinned_extents for any extents that can't be used yet
338338
* since their free space will be released as soon as the transaction commits.
339339
*/
340-
static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
341-
struct btrfs_fs_info *info, u64 start, u64 end)
340+
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
341+
struct btrfs_fs_info *info, u64 start, u64 end)
342342
{
343343
u64 extent_start, extent_end, size, total_added = 0;
344344
int ret;
@@ -9381,6 +9381,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
93819381
INIT_LIST_HEAD(&cache->io_list);
93829382
btrfs_init_free_space_ctl(cache);
93839383
atomic_set(&cache->trimming, 0);
9384+
mutex_init(&cache->free_space_lock);
93849385

93859386
return cache;
93869387
}

0 commit comments

Comments
 (0)