@@ -53,6 +53,22 @@ void zlib_free_workspace(struct list_head *ws)
5353 kfree (workspace );
5454}
5555
56+ /*
57+ * For s390 hardware acceleration, the buffer size should be at least
58+ * ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
59+ *
60+ * But if bs > ps we can have large enough folios that meet the s390 hardware
61+ * handling.
62+ */
63+ static bool need_special_buffer (struct btrfs_fs_info * fs_info )
64+ {
65+ if (!zlib_deflate_dfltcc_enabled ())
66+ return false;
67+ if (btrfs_min_folio_size (fs_info ) >= ZLIB_DFLTCC_BUF_SIZE )
68+ return false;
69+ return true;
70+ }
71+
5672struct list_head * zlib_alloc_workspace (struct btrfs_fs_info * fs_info , unsigned int level )
5773{
5874 const u32 blocksize = fs_info -> sectorsize ;
@@ -68,11 +84,7 @@ struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned i
6884 workspace -> strm .workspace = kvzalloc (workspacesize , GFP_KERNEL | __GFP_NOWARN );
6985 workspace -> level = level ;
7086 workspace -> buf = NULL ;
71- /*
72- * In case of s390 zlib hardware support, allocate lager workspace
73- * buffer. If allocator fails, fall back to a single page buffer.
74- */
75- if (zlib_deflate_dfltcc_enabled ()) {
87+ if (need_special_buffer (fs_info )) {
7688 workspace -> buf = kmalloc (ZLIB_DFLTCC_BUF_SIZE ,
7789 __GFP_NOMEMALLOC | __GFP_NORETRY |
7890 __GFP_NOWARN | GFP_NOIO );
@@ -139,6 +151,8 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
139151 struct btrfs_fs_info * fs_info = inode -> root -> fs_info ;
140152 struct workspace * workspace = list_entry (ws , struct workspace , list );
141153 struct address_space * mapping = inode -> vfs_inode .i_mapping ;
154+ const u32 min_folio_shift = PAGE_SHIFT + fs_info -> block_min_order ;
155+ const u32 min_folio_size = btrfs_min_folio_size (fs_info );
142156 int ret ;
143157 char * data_in = NULL ;
144158 char * cfolio_out ;
@@ -147,7 +161,7 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
147161 struct folio * out_folio = NULL ;
148162 unsigned long len = * total_out ;
149163 unsigned long nr_dest_folios = * out_folios ;
150- const unsigned long max_out = nr_dest_folios * PAGE_SIZE ;
164+ const unsigned long max_out = nr_dest_folios << min_folio_shift ;
151165 const u32 blocksize = fs_info -> sectorsize ;
152166 const u64 orig_end = start + len ;
153167
@@ -179,7 +193,7 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
179193 workspace -> strm .next_in = workspace -> buf ;
180194 workspace -> strm .avail_in = 0 ;
181195 workspace -> strm .next_out = cfolio_out ;
182- workspace -> strm .avail_out = PAGE_SIZE ;
196+ workspace -> strm .avail_out = min_folio_size ;
183197
184198 while (workspace -> strm .total_in < len ) {
185199 /*
@@ -191,10 +205,11 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
191205 unsigned int copy_length = min (bytes_left , workspace -> buf_size );
192206
193207 /*
194- * This can only happen when hardware zlib compression is
195- * enabled.
208+ * For s390 hardware accelerated zlib, and our folio is smaller
209+ * than the copy_length, we need to fill the buffer so that
210+ * we can take full advantage of hardware acceleration.
196211 */
197- if (copy_length > PAGE_SIZE ) {
212+ if (need_special_buffer ( fs_info ) ) {
198213 ret = copy_data_into_buffer (mapping , workspace ,
199214 start , copy_length );
200215 if (ret < 0 )
@@ -258,7 +273,7 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
258273 cfolio_out = folio_address (out_folio );
259274 folios [nr_folios ] = out_folio ;
260275 nr_folios ++ ;
261- workspace -> strm .avail_out = PAGE_SIZE ;
276+ workspace -> strm .avail_out = min_folio_size ;
262277 workspace -> strm .next_out = cfolio_out ;
263278 }
264279 /* we're all done */
@@ -294,7 +309,7 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
294309 cfolio_out = folio_address (out_folio );
295310 folios [nr_folios ] = out_folio ;
296311 nr_folios ++ ;
297- workspace -> strm .avail_out = PAGE_SIZE ;
312+ workspace -> strm .avail_out = min_folio_size ;
298313 workspace -> strm .next_out = cfolio_out ;
299314 }
300315 }
@@ -320,20 +335,22 @@ int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
320335
321336int zlib_decompress_bio (struct list_head * ws , struct compressed_bio * cb )
322337{
338+ struct btrfs_fs_info * fs_info = cb_to_fs_info (cb );
323339 struct workspace * workspace = list_entry (ws , struct workspace , list );
340+ const u32 min_folio_size = btrfs_min_folio_size (fs_info );
324341 int ret = 0 , ret2 ;
325342 int wbits = MAX_WBITS ;
326343 char * data_in ;
327344 size_t total_out = 0 ;
328345 unsigned long folio_in_index = 0 ;
329346 size_t srclen = cb -> compressed_len ;
330- unsigned long total_folios_in = DIV_ROUND_UP (srclen , PAGE_SIZE );
347+ unsigned long total_folios_in = DIV_ROUND_UP (srclen , min_folio_size );
331348 unsigned long buf_start ;
332349 struct folio * * folios_in = cb -> compressed_folios ;
333350
334351 data_in = kmap_local_folio (folios_in [folio_in_index ], 0 );
335352 workspace -> strm .next_in = data_in ;
336- workspace -> strm .avail_in = min_t (size_t , srclen , PAGE_SIZE );
353+ workspace -> strm .avail_in = min_t (size_t , srclen , min_folio_size );
337354 workspace -> strm .total_in = 0 ;
338355
339356 workspace -> strm .total_out = 0 ;
@@ -394,7 +411,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
394411 data_in = kmap_local_folio (folios_in [folio_in_index ], 0 );
395412 workspace -> strm .next_in = data_in ;
396413 tmp = srclen - workspace -> strm .total_in ;
397- workspace -> strm .avail_in = min (tmp , PAGE_SIZE );
414+ workspace -> strm .avail_in = min (tmp , min_folio_size );
398415 }
399416 }
400417 if (unlikely (ret != Z_STREAM_END )) {
0 commit comments