Skip to content

Commit 08b9703

Browse files
adam900710kdave
authored andcommitted
btrfs: introduce btrfs_bio::async_csum
[ENHANCEMENT] Btrfs currently calculate its data checksum then submit the bio. But after commit 968f19c ("btrfs: always fallback to buffered write if the inode requires checksum"), any writes with data checksum will fallback to buffered IO, meaning the content will not change during writeback. This means we're safe to calculate the data checksum and submit the bio in parallel, and only need the following new behaviors: - Wait the csum generation to finish before calling btrfs_bio::end_io() Or we can lead to use-after-free for the csum generation worker. - Save the current bi_iter for csum_one_bio() As the submission part can advance btrfs_bio::bio.bi_iter, if not saved csum_one_bio() may got an empty bi_iter and do not generate any checksum. Unfortunately this means we have to increase the size of btrfs_bio for 16 bytes. As usual, such new feature is hidden behind the experimental flag. [THEORETIC ANALYZE] Consider the following theoretic hardware performance, which should be more or less close to modern mainstream hardware: Memory bandwidth: 50GiB/s CRC32C bandwidth: 45GiB/s SSD bandwidth: 8GiB/s Then btrfs write bandwidth with data checksum before the patch would be 1 / ( 1 / 50 + 1 / 45 + 1 / 8) = 5.98 GiB/s After the patch, the bandwidth would be: 1 / ( 1 / 50 + max( 1 / 45 + 1 / 8)) = 6.90 GiB/s The difference would be 15.32 % improvement. [REAL WORLD BENCHMARK] I'm using a Zen5 (HX 370) as the host, the VM has 4GiB memory, 10 vCPUs, the storage is backed by a PCIE gen3 x4 NVME SSD. The test is a direct IO write, with 1MiB block size, write 7GiB data into a btrfs mount with data checksum. Thus the direct write will fallback to buffered one: Vanilla Datasum: 1619.97 GiB/s Patched Datasum: 1792.26 GiB/s Diff +10.6 % In my case, the bottleneck is the storage, thus the improvement is not reaching the theoretic one, but still some observable improvement. Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 20ae694 commit 08b9703

File tree

4 files changed

+67
-23
lines changed

4 files changed

+67
-23
lines changed

fs/btrfs/bio.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
105105
/* Make sure we're already in task context. */
106106
ASSERT(in_task());
107107

108+
if (bbio->async_csum)
109+
wait_for_completion(&bbio->csum_done);
110+
108111
bbio->bio.bi_status = status;
109112
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
110113
struct btrfs_bio *orig_bbio = bbio->private;
@@ -538,7 +541,11 @@ static int btrfs_bio_csum(struct btrfs_bio *bbio)
538541
{
539542
if (bbio->bio.bi_opf & REQ_META)
540543
return btree_csum_one_bio(bbio);
541-
return btrfs_csum_one_bio(bbio);
544+
#ifdef CONFIG_BTRFS_EXPERIMENTAL
545+
return btrfs_csum_one_bio(bbio, true);
546+
#else
547+
return btrfs_csum_one_bio(bbio, false);
548+
#endif
542549
}
543550

544551
/*
@@ -617,10 +624,14 @@ static bool should_async_write(struct btrfs_bio *bbio)
617624
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
618625
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
619626

620-
if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
621-
return false;
622-
623-
auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
627+
if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
628+
return true;
629+
/*
630+
* Write bios will calculate checksum and submit bio at the same time.
631+
* Unless explicitly required don't offload serial csum calculate and bio
632+
* submit into a workqueue.
633+
*/
634+
return false;
624635
#endif
625636

626637
/* Submit synchronously if the checksum implementation is fast. */

fs/btrfs/bio.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct btrfs_bio {
5656
struct {
5757
struct btrfs_ordered_extent *ordered;
5858
struct btrfs_ordered_sum *sums;
59+
struct work_struct csum_work;
60+
struct completion csum_done;
61+
struct bvec_iter csum_saved_iter;
5962
u64 orig_physical;
6063
};
6164

@@ -83,6 +86,10 @@ struct btrfs_bio {
8386
* scrub bios.
8487
*/
8588
bool is_scrub;
89+
90+
/* Whether the csum generation for data write is async. */
91+
bool async_csum;
92+
8693
/*
8794
* This member must come last, bio_alloc_bioset will allocate enough
8895
* bytes for entire btrfs_bio but relies on bio being last.

fs/btrfs/file-item.c

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "fs.h"
1919
#include "accessors.h"
2020
#include "file-item.h"
21+
#include "volumes.h"
2122

2223
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
2324
sizeof(struct btrfs_item) * 2) / \
@@ -764,21 +765,46 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
764765
return ret;
765766
}
766767

768+
static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
769+
{
770+
struct btrfs_inode *inode = bbio->inode;
771+
struct btrfs_fs_info *fs_info = inode->root->fs_info;
772+
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
773+
struct bio *bio = &bbio->bio;
774+
struct btrfs_ordered_sum *sums = bbio->sums;
775+
struct bvec_iter iter = *src;
776+
phys_addr_t paddr;
777+
const u32 blocksize = fs_info->sectorsize;
778+
int index = 0;
779+
780+
shash->tfm = fs_info->csum_shash;
781+
782+
btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
783+
btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
784+
index += fs_info->csum_size;
785+
}
786+
}
787+
788+
static void csum_one_bio_work(struct work_struct *work)
789+
{
790+
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work);
791+
792+
ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
793+
ASSERT(bbio->async_csum == true);
794+
csum_one_bio(bbio, &bbio->csum_saved_iter);
795+
complete(&bbio->csum_done);
796+
}
797+
767798
/*
768799
* Calculate checksums of the data contained inside a bio.
769800
*/
770-
int btrfs_csum_one_bio(struct btrfs_bio *bbio)
801+
int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async)
771802
{
772803
struct btrfs_ordered_extent *ordered = bbio->ordered;
773804
struct btrfs_inode *inode = bbio->inode;
774805
struct btrfs_fs_info *fs_info = inode->root->fs_info;
775-
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
776806
struct bio *bio = &bbio->bio;
777807
struct btrfs_ordered_sum *sums;
778-
struct bvec_iter iter = bio->bi_iter;
779-
phys_addr_t paddr;
780-
const u32 blocksize = fs_info->sectorsize;
781-
int index;
782808
unsigned nofs_flag;
783809

784810
nofs_flag = memalloc_nofs_save();
@@ -789,21 +815,21 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
789815
if (!sums)
790816
return -ENOMEM;
791817

818+
sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
792819
sums->len = bio->bi_iter.bi_size;
793820
INIT_LIST_HEAD(&sums->list);
794-
795-
sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
796-
index = 0;
797-
798-
shash->tfm = fs_info->csum_shash;
799-
800-
btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
801-
btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
802-
index += fs_info->csum_size;
803-
}
804-
805821
bbio->sums = sums;
806822
btrfs_add_ordered_sum(ordered, sums);
823+
824+
if (!async) {
825+
csum_one_bio(bbio, &bbio->bio.bi_iter);
826+
return 0;
827+
}
828+
init_completion(&bbio->csum_done);
829+
bbio->async_csum = true;
830+
bbio->csum_saved_iter = bbio->bio.bi_iter;
831+
INIT_WORK(&bbio->csum_work, csum_one_bio_work);
832+
schedule_work(&bbio->csum_work);
807833
return 0;
808834
}
809835

fs/btrfs/file-item.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
6464
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
6565
struct btrfs_root *root,
6666
struct btrfs_ordered_sum *sums);
67-
int btrfs_csum_one_bio(struct btrfs_bio *bbio);
67+
int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async);
6868
int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
6969
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
7070
struct list_head *list, int search_commit,

0 commit comments

Comments
 (0)