2828#include <linux/pipe_fs_i.h>
2929#include <linux/mpage.h>
3030#include <linux/quotaops.h>
31+ #include <linux/blkdev.h>
3132
3233#include <cluster/masklog.h>
3334
4748#include "ocfs2_trace.h"
4849
4950#include "buffer_head_io.h"
51+ #include "dir.h"
52+ #include "namei.h"
53+ #include "sysfile.h"
5054
5155static int ocfs2_symlink_get_block (struct inode * inode , sector_t iblock ,
5256 struct buffer_head * bh_result , int create )
@@ -597,13 +601,194 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
597601 return try_to_free_buffers (page );
598602}
599603
604+ static int ocfs2_is_overwrite (struct ocfs2_super * osb ,
605+ struct inode * inode , loff_t offset )
606+ {
607+ int ret = 0 ;
608+ u32 v_cpos = 0 ;
609+ u32 p_cpos = 0 ;
610+ unsigned int num_clusters = 0 ;
611+ unsigned int ext_flags = 0 ;
612+
613+ v_cpos = ocfs2_bytes_to_clusters (osb -> sb , offset );
614+ ret = ocfs2_get_clusters (inode , v_cpos , & p_cpos ,
615+ & num_clusters , & ext_flags );
616+ if (ret < 0 ) {
617+ mlog_errno (ret );
618+ return ret ;
619+ }
620+
621+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN ))
622+ return 1 ;
623+
624+ return 0 ;
625+ }
626+
627+ static ssize_t ocfs2_direct_IO_write (struct kiocb * iocb ,
628+ struct iov_iter * iter ,
629+ loff_t offset )
630+ {
631+ ssize_t ret = 0 ;
632+ ssize_t written = 0 ;
633+ bool orphaned = false;
634+ int is_overwrite = 0 ;
635+ struct file * file = iocb -> ki_filp ;
636+ struct inode * inode = file_inode (file )-> i_mapping -> host ;
637+ struct ocfs2_super * osb = OCFS2_SB (inode -> i_sb );
638+ struct buffer_head * di_bh = NULL ;
639+ size_t count = iter -> count ;
640+ journal_t * journal = osb -> journal -> j_journal ;
641+ u32 zero_len ;
642+ int cluster_align ;
643+ loff_t final_size = offset + count ;
644+ int append_write = offset >= i_size_read (inode ) ? 1 : 0 ;
645+ unsigned int num_clusters = 0 ;
646+ unsigned int ext_flags = 0 ;
647+
648+ {
649+ u64 o = offset ;
650+
651+ zero_len = do_div (o , 1 << osb -> s_clustersize_bits );
652+ cluster_align = !zero_len ;
653+ }
654+
655+ /*
656+ * when final_size > inode->i_size, inode->i_size will be
657+ * updated after direct write, so add the inode to orphan
658+ * dir first.
659+ */
660+ if (final_size > i_size_read (inode )) {
661+ ret = ocfs2_add_inode_to_orphan (osb , inode );
662+ if (ret < 0 ) {
663+ mlog_errno (ret );
664+ goto out ;
665+ }
666+ orphaned = true;
667+ }
668+
669+ if (append_write ) {
670+ ret = ocfs2_inode_lock (inode , & di_bh , 1 );
671+ if (ret < 0 ) {
672+ mlog_errno (ret );
673+ goto clean_orphan ;
674+ }
675+
676+ if (ocfs2_sparse_alloc (OCFS2_SB (inode -> i_sb )))
677+ ret = ocfs2_zero_extend (inode , di_bh , offset );
678+ else
679+ ret = ocfs2_extend_no_holes (inode , di_bh , offset ,
680+ offset );
681+ if (ret < 0 ) {
682+ mlog_errno (ret );
683+ ocfs2_inode_unlock (inode , 1 );
684+ brelse (di_bh );
685+ goto clean_orphan ;
686+ }
687+
688+ is_overwrite = ocfs2_is_overwrite (osb , inode , offset );
689+ if (is_overwrite < 0 ) {
690+ mlog_errno (is_overwrite );
691+ ocfs2_inode_unlock (inode , 1 );
692+ brelse (di_bh );
693+ goto clean_orphan ;
694+ }
695+
696+ ocfs2_inode_unlock (inode , 1 );
697+ brelse (di_bh );
698+ di_bh = NULL ;
699+ }
700+
701+ written = __blockdev_direct_IO (WRITE , iocb , inode , inode -> i_sb -> s_bdev ,
702+ iter , offset ,
703+ ocfs2_direct_IO_get_blocks ,
704+ ocfs2_dio_end_io , NULL , 0 );
705+ if (unlikely (written < 0 )) {
706+ loff_t i_size = i_size_read (inode );
707+
708+ if (offset + count > i_size ) {
709+ ret = ocfs2_inode_lock (inode , & di_bh , 1 );
710+ if (ret < 0 ) {
711+ mlog_errno (ret );
712+ goto clean_orphan ;
713+ }
714+
715+ if (i_size == i_size_read (inode )) {
716+ ret = ocfs2_truncate_file (inode , di_bh ,
717+ i_size );
718+ if (ret < 0 ) {
719+ if (ret != - ENOSPC )
720+ mlog_errno (ret );
721+
722+ ocfs2_inode_unlock (inode , 1 );
723+ brelse (di_bh );
724+ goto clean_orphan ;
725+ }
726+ }
727+
728+ ocfs2_inode_unlock (inode , 1 );
729+ brelse (di_bh );
730+
731+ ret = jbd2_journal_force_commit (journal );
732+ if (ret < 0 )
733+ mlog_errno (ret );
734+ }
735+ } else if (written < 0 && append_write && !is_overwrite &&
736+ !cluster_align ) {
737+ u32 p_cpos = 0 ;
738+ u32 v_cpos = ocfs2_bytes_to_clusters (osb -> sb , offset );
739+
740+ ret = ocfs2_get_clusters (inode , v_cpos , & p_cpos ,
741+ & num_clusters , & ext_flags );
742+ if (ret < 0 ) {
743+ mlog_errno (ret );
744+ goto clean_orphan ;
745+ }
746+
747+ BUG_ON (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN ));
748+
749+ ret = blkdev_issue_zeroout (osb -> sb -> s_bdev ,
750+ p_cpos << (osb -> s_clustersize_bits - 9 ),
751+ zero_len >> 9 , GFP_KERNEL , false);
752+ if (ret < 0 )
753+ mlog_errno (ret );
754+ }
755+
756+ clean_orphan :
757+ if (orphaned ) {
758+ int tmp_ret ;
759+ int update_isize = written > 0 ? 1 : 0 ;
760+ loff_t end = update_isize ? offset + written : 0 ;
761+
762+ tmp_ret = ocfs2_del_inode_from_orphan (osb , inode ,
763+ update_isize , end );
764+ if (tmp_ret < 0 ) {
765+ ret = tmp_ret ;
766+ goto out ;
767+ }
768+
769+ tmp_ret = jbd2_journal_force_commit (journal );
770+ if (tmp_ret < 0 ) {
771+ ret = tmp_ret ;
772+ mlog_errno (tmp_ret );
773+ }
774+ }
775+
776+ out :
777+ if (ret >= 0 )
778+ ret = written ;
779+ return ret ;
780+ }
781+
600782static ssize_t ocfs2_direct_IO (int rw ,
601783 struct kiocb * iocb ,
602784 struct iov_iter * iter ,
603785 loff_t offset )
604786{
605787 struct file * file = iocb -> ki_filp ;
606788 struct inode * inode = file_inode (file )-> i_mapping -> host ;
789+ struct ocfs2_super * osb = OCFS2_SB (inode -> i_sb );
790+ int full_coherency = !(osb -> s_mount_opt &
791+ OCFS2_MOUNT_COHERENCY_BUFFERED );
607792
608793 /*
609794 * Fallback to buffered I/O if we see an inode without
@@ -612,14 +797,20 @@ static ssize_t ocfs2_direct_IO(int rw,
612797 if (OCFS2_I (inode )-> ip_dyn_features & OCFS2_INLINE_DATA_FL )
613798 return 0 ;
614799
615- /* Fallback to buffered I/O if we are appending. */
616- if (i_size_read (inode ) <= offset )
800+ /* Fallback to buffered I/O if we are appending and
801+ * concurrent O_DIRECT writes are allowed.
802+ */
803+ if (i_size_read (inode ) <= offset && !full_coherency )
617804 return 0 ;
618805
619- return __blockdev_direct_IO (rw , iocb , inode , inode -> i_sb -> s_bdev ,
806+ if (rw == READ )
807+ return __blockdev_direct_IO (rw , iocb , inode ,
808+ inode -> i_sb -> s_bdev ,
620809 iter , offset ,
621810 ocfs2_direct_IO_get_blocks ,
622811 ocfs2_dio_end_io , NULL , 0 );
812+ else
813+ return ocfs2_direct_IO_write (iocb , iter , offset );
623814}
624815
625816static void ocfs2_figure_cluster_boundaries (struct ocfs2_super * osb ,
0 commit comments