diff -drupN a/fs/f2fs/file.c b/fs/f2fs/file.c --- a/fs/f2fs/file.c 2018-08-06 17:23:04.000000000 +0300 +++ b/fs/f2fs/file.c 2022-06-12 05:28:14.000000000 +0300 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,19 @@ #include "trace.h" #include +static int f2fs_filemap_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&F2FS_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&F2FS_I(inode)->i_mmap_sem); + + return err; +} + static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -41,6 +55,11 @@ static int f2fs_vm_page_mkwrite(struct v struct dnode_of_data dn; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -59,13 +78,14 @@ static int f2fs_vm_page_mkwrite(struct v f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vma->vm_file); + down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || !PageUptodate(page))) { unlock_page(page); err = -EFAULT; - goto out; + goto out_sem; } /* @@ -85,25 +105,28 @@ static int f2fs_vm_page_mkwrite(struct v if (!PageUptodate(page)) SetPageUptodate(page); + f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + /* wait for GCed page writeback via META_MAPPING */ + if (f2fs_post_read_required(inode)) + f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); - /* if gced page is attached, don't write to cold segment */ - clear_cold_data(page); +out_sem: + up_read(&F2FS_I(inode)->i_mmap_sem); out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } static const struct vm_operations_struct f2fs_file_vm_ops = { - .fault = filemap_fault, + .fault = f2fs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, }; @@ -118,39 +141,38 @@ static int get_parent_ino(struct inode * if (!dentry) return 0; - if (update_dent_inode(inode, inode, &dentry->d_name)) { - dput(dentry); - return 0; - } - *pino = parent_ino(dentry); dput(dentry); return 1; } -static inline bool need_do_checkpoint(struct inode *inode) +static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool need_cp = false; + enum cp_reason_type cp_reason = CP_NO_NEEDED; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; + if (!S_ISREG(inode->i_mode)) + cp_reason = CP_NON_REGULAR; + else if (inode->i_nlink != 1) + cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) - need_cp = true; + cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) - need_cp = true; + cp_reason = CP_WRONG_PINO; else if (!space_for_roll_forward(sbi)) - need_cp = true; + cp_reason = CP_NO_SPC_ROLL; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; + cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) - need_cp = true; - else if (sbi->active_logs == 2) - need_cp = true; + cp_reason = CP_FASTBOOT_MODE; + else if (F2FS_OPTION(sbi).active_logs == 2) + cp_reason = CP_SPEC_LOG_NUM; + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && + exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) + cp_reason = CP_RECOVER_DIR; - return need_cp; + return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -170,7 +192,6 @@ static void try_to_fix_pino(struct inode nid_t pino; down_write(&fi->i_sem); - fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); @@ -186,7 +207,7 @@ static int f2fs_do_sync_file(struct file struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; - bool need_cp = false; + enum cp_reason_type cp_reason = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, @@ -205,12 +226,12 @@ static int f2fs_do_sync_file(struct file clear_inode_flag(inode, FI_NEED_IPU); if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } /* if the inode is dirty, let's recover all the time */ - if (!datasync && !f2fs_skip_inode_update(inode)) { + if (!f2fs_skip_inode_update(inode, datasync)) { f2fs_write_inode(inode, NULL); goto go_write; } @@ -236,10 +257,10 @@ go_write: * sudden-power-off. */ down_read(&F2FS_I(inode)->i_sem); - need_cp = need_do_checkpoint(inode); + cp_reason = need_do_checkpoint(inode); up_read(&F2FS_I(inode)->i_sem); - if (need_cp) { + if (cp_reason) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); @@ -264,31 +285,47 @@ sync_nodes: } if (need_inode_block_update(sbi, ino)) { - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, true); f2fs_write_inode(inode, NULL); goto sync_nodes; } - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + /* + * If it's atomic_write, it's just fine to keep write ordering. So + * here we don't need to wait for node write completion, since we use + * node chain which serializes node blocks. If one of node writes are + * reordered, we can see simply broken chain, resulting in stopping + * roll-forward recovery. It means we'll recover all or none node blocks + * given fsync mark. + */ + if (!atomic) { + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + } /* once recovery info is written, don't need to tack this */ remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(inode, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(sbi); + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) + ret = f2fs_issue_flush(sbi, inode->i_ino); + if (!ret) { + remove_ino_entry(sbi, ino, UPDATE_INO); + clear_inode_flag(inode, FI_UPDATE_WRITE); + remove_ino_entry(sbi, ino, FLUSH_INO); + } f2fs_update_time(sbi, REQ_TIME); out: - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } @@ -376,7 +413,8 @@ static loff_t f2fs_seek_block(struct fil dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -424,13 +462,8 @@ static int f2fs_file_mmap(struct file *f struct inode *inode = file_inode(file); int err; - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return 0; - if (!f2fs_encrypted_inode(inode)) - return -ENOKEY; - } + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); @@ -444,35 +477,29 @@ static int f2fs_file_mmap(struct file *f static int f2fs_file_open(struct inode *inode, struct file *filp) { - int ret = generic_file_open(inode, filp); - struct dentry *dir; + int err = fscrypt_file_open(inode, filp); - if (!ret && f2fs_encrypted_inode(inode)) { - ret = fscrypt_get_encryption_info(inode); - if (ret) - return -EACCES; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - dir = dget_parent(file_dentry(filp)); - if (f2fs_encrypted_inode(d_inode(dir)) && - !fscrypt_has_permitted_context(d_inode(dir), inode)) { - dput(dir); - return -EPERM; - } - dput(dir); - return ret; + if (err) + return err; + + filp->f_mode |= FMODE_NOWAIT; + + return dquot_file_open(inode, filp); } -int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; + int base = 0; + + if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) + base = get_extra_isize(dn->inode); raw_node = F2FS_NODE(dn->node_page); - addr = blkaddr_in_node(raw_node) + ofs; + addr = blkaddr_in_node(raw_node) + base + ofs; for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); @@ -503,7 +530,6 @@ int truncate_data_blocks_range(struct dn f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); - return nr_free; } void truncate_data_blocks(struct dnode_of_data *dn) @@ -532,12 +558,14 @@ static int truncate_partial_data_page(st page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - return 0; + return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); - if (!cache_only || !f2fs_encrypted_inode(inode) || - !S_ISREG(inode->i_mode)) + + /* An encrypted inode should have a key and truncate the last page. */ + f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode)); + if (!cache_only) set_page_dirty(page); f2fs_put_page(page, 1); return 0; @@ -546,7 +574,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -555,7 +582,7 @@ int truncate_blocks(struct inode *inode, trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; @@ -570,8 +597,7 @@ int truncate_blocks(struct inode *inode, } if (f2fs_has_inline_data(inode)) { - if (truncate_inline_inode(ipage, from)) - set_page_dirty(ipage); + truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; @@ -614,12 +640,21 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; trace_f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { + f2fs_show_injection_info(FAULT_TRUNCATE); + return -EIO; + } +#endif /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -632,16 +667,52 @@ int f2fs_truncate(struct inode *inode) return err; inode->i_mtime = inode->i_ctime = current_time(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, false); return 0; } int f2fs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) + struct dentry *dentry, struct kstat *stat) { struct inode *inode = d_inode(dentry); +#if 0 + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri; + unsigned int flags; + + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_inode_crtime(inode->i_sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = fi->i_crtime.tv_sec; + stat->btime.tv_nsec = fi->i_crtime.tv_nsec; + } + + flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); + if (flags & FS_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (f2fs_encrypted_inode(inode)) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & FS_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & FS_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); +#endif generic_fillattr(inode, stat); - stat->blocks <<= 3; + + /* we need to show initial sectors used for inline_data/dentries */ + if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || + f2fs_has_inline_dentry(inode)) + stat->blocks += (stat->size + 511) >> 9; + return 0; } @@ -679,28 +750,49 @@ int f2fs_setattr(struct dentry *dentry, { struct inode *inode = d_inode(dentry); int err; + bool size_changed = false; + + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; err = setattr_prepare(dentry, attr); if (err) return err; - if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + + if (is_quota_modification(inode, attr)) { + err = dquot_initialize(inode); + if (err) + return err; + } + if ((attr->ia_valid & ATTR_UID && + !uid_eq(attr->ia_uid, inode->i_uid)) || + (attr->ia_valid & ATTR_GID && + !gid_eq(attr->ia_gid, inode->i_gid))) { + err = dquot_transfer(inode, attr); + if (err) + return err; + } + if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size <= i_size_read(inode)) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); err = f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); if (err) return err; - f2fs_balance_fs(F2FS_I_SB(inode), true); } else { /* * do not trim all blocks after i_size if target size is * larger than i_size. */ + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); + up_write(&F2FS_I(inode)->i_mmap_sem); /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { @@ -710,6 +802,12 @@ int f2fs_setattr(struct dentry *dentry, } inode->i_mtime = inode->i_ctime = current_time(inode); } + + down_write(&F2FS_I(inode)->i_sem); + F2FS_I(inode)->last_disk_size = i_size_read(inode); + up_write(&F2FS_I(inode)->i_sem); + + size_changed = true; } __setattr_copy(inode, attr); @@ -722,7 +820,12 @@ int f2fs_setattr(struct dentry *dentry, } } - f2fs_mark_inode_dirty_sync(inode); + /* file size may changed here */ + f2fs_mark_inode_dirty_sync(inode, size_changed); + + /* inode change will produce dirty node pages flushed by checkpoint */ + f2fs_balance_fs(F2FS_I_SB(inode), true); + return err; } @@ -774,7 +877,7 @@ int truncate_hole(struct inode *inode, p err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start++; + pg_start = get_next_page_offset(&dn, pg_start); continue; } return err; @@ -836,12 +939,14 @@ static int punch_hole(struct inode *inod blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -872,7 +977,8 @@ next_dnode: done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + *blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); if (!is_checkpointed_data(sbi, *blkaddr)) { if (test_opt(sbi, LFS)) { @@ -948,15 +1054,15 @@ static int __clone_blkaddrs(struct inode ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.node_page, - dn.ofs_in_node); + dn.data_blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node); truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { f2fs_i_blocks_write(src_inode, - 1, false); + 1, false, false); f2fs_i_blocks_write(dst_inode, - 1, true); + 1, true, false); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, blkaddr[i], ni.version, true, false); @@ -1008,11 +1114,13 @@ static int __exchange_data_block(struct while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1080,16 +1188,20 @@ static int f2fs_collapse_range(struct in pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out_unlock; truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - return ret; + goto out_unlock; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1101,7 +1213,9 @@ static int f2fs_collapse_range(struct in ret = truncate_blocks(inode, new_size, true); if (!ret) f2fs_i_size_write(inode, new_size); - +out_unlock: + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1115,7 +1229,8 @@ static int f2fs_do_zero_range(struct dno int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR) + if (datablock_addr(dn->inode, dn->node_page, + dn->ofs_in_node) == NULL_ADDR) count++; } @@ -1126,8 +1241,8 @@ static int f2fs_do_zero_range(struct dno dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = - datablock_addr(dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = datablock_addr(dn->inode, + dn->node_page, dn->ofs_in_node); /* * reserve_new_blocks will not guarantee entire block * allocation. @@ -1166,9 +1281,10 @@ static int f2fs_zero_range(struct inode if (ret) return ret; + down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - return ret; + goto out_sem; truncate_pagecache_range(inode, offset, offset + len - 1); @@ -1182,17 +1298,15 @@ static int f2fs_zero_range(struct inode ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - return ret; + goto out_sem; - if (offset + len > new_size) - new_size = offset + len; new_size = max_t(loff_t, new_size, offset + len); } else { if (off_start) { ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1218,6 +1332,9 @@ static int f2fs_zero_range(struct inode ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + + f2fs_balance_fs(sbi, dn.node_changed); + if (ret) goto out; @@ -1236,8 +1353,14 @@ static int f2fs_zero_range(struct inode } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } +out_sem: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1250,8 +1373,9 @@ static int f2fs_insert_range(struct inod int ret = 0; new_size = i_size_read(inode) + len; - if (new_size > inode->i_sb->s_maxbytes) - return -EFBIG; + ret = inode_newsize_ok(inode, new_size); + if (ret) + return ret; if (offset >= i_size_read(inode)) return -EINVAL; @@ -1266,14 +1390,18 @@ static int f2fs_insert_range(struct inod f2fs_balance_fs(sbi, true); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) - return ret; + goto out; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); @@ -1302,6 +1430,9 @@ static int f2fs_insert_range(struct inod if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1309,19 +1440,20 @@ static int expand_inode_data(struct inod loff_t len, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_pgofs = NULL, + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; - int ret; + int err; - ret = inode_newsize_ok(inode, (len + offset)); - if (ret) - return ret; + err = inode_newsize_ok(inode, (len + offset)); + if (err) + return err; - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; + err = f2fs_convert_inline_inode(inode); + if (err) + return err; f2fs_balance_fs(sbi, true); @@ -1333,12 +1465,12 @@ static int expand_inode_data(struct inod if (off_end) map.m_len++; - ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); - if (ret) { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (err) { pgoff_t last_off; if (!map.m_len) - return ret; + return err; last_off = map.m_lblk + map.m_len - 1; @@ -1349,10 +1481,14 @@ static int expand_inode_data(struct inod new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } - return ret; + return err; } static long f2fs_fallocate(struct file *file, int mode, @@ -1361,6 +1497,9 @@ static long f2fs_fallocate(struct file * struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -1393,7 +1532,7 @@ static long f2fs_fallocate(struct file * if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); - f2fs_mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode, false); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1419,6 +1558,7 @@ static int f2fs_release_file(struct inod drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); @@ -1426,17 +1566,20 @@ static int f2fs_release_file(struct inod return 0; } -#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) -#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) - -static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +static int f2fs_file_flush(struct file *file, fl_owner_t id) { - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & F2FS_REG_FLMASK; - else - return flags & F2FS_OTHER_FLMASK; + struct inode *inode = file_inode(file); + + /* + * If the process doing a transaction is crashed, we should do + * roll-back. Otherwise, other reader/write can see corrupted database + * until all the writers close its file. Since this should be done + * before dropping file lock, it needs to do in ->flush. + */ + if (f2fs_is_atomic_file(inode) && + F2FS_I(inode)->inmem_task == current) + drop_inmem_pages(inode); + return 0; } static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) @@ -1465,28 +1608,34 @@ static int f2fs_ioc_setflags(struct file if (ret) return ret; - flags = f2fs_mask_flags(inode->i_mode, flags); - inode_lock(inode); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + ret = -EPERM; + goto unlock_out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + oldflags = fi->i_flags; if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); ret = -EPERM; - goto out; + goto unlock_out; } } flags = flags & FS_FL_USER_MODIFIABLE; flags |= oldflags & ~FS_FL_USER_MODIFIABLE; fi->i_flags = flags; - inode_unlock(inode); inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); -out: + f2fs_mark_inode_dirty_sync(inode, false); +unlock_out: + inode_unlock(inode); mnt_drop_write_file(filp); return ret; } @@ -1506,6 +1655,9 @@ static int f2fs_ioc_start_atomic_write(s if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1522,17 +1674,26 @@ static int f2fs_ioc_start_atomic_write(s goto out; set_inode_flag(inode, FI_ATOMIC_FILE); + set_inode_flag(inode, FI_HOT_DATA); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) - goto out; + goto inc_stat; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) + if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); + goto out; + } + +inc_stat: + F2FS_I(inode)->inmem_task = current; + stat_inc_atomic_write(inode); + stat_update_max_atomic_write(inode); out: up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); @@ -1554,20 +1715,27 @@ static int f2fs_ioc_commit_atomic_write( inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(inode, FI_ATOMIC_FILE); ret = commit_inmem_pages(inode); - if (ret) { - set_inode_flag(inode, FI_ATOMIC_FILE); + if (ret) goto err_out; + + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + if (!ret) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); + stat_dec_atomic_write(inode); } + } else { + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } - - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1581,6 +1749,9 @@ static int f2fs_ioc_start_volatile_write if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1594,6 +1765,9 @@ static int f2fs_ioc_start_volatile_write if (ret) goto out; + stat_inc_volatile_write(inode); + stat_update_max_volatile_write(inode); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -1649,6 +1823,7 @@ static int f2fs_ioc_abort_volatile_write drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } @@ -1682,27 +1857,40 @@ static int f2fs_ioc_shutdown(struct file switch (in) { case F2FS_GOING_DOWN_FULLSYNC: sb = freeze_bdev(sb->s_bdev); - if (sb && !IS_ERR(sb)) { + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + goto out; + } + if (sb) { f2fs_stop_checkpoint(sbi, false); thaw_bdev(sb->s_bdev, sb); } break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ - f2fs_sync_fs(sb, 1); + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); break; default: ret = -EINVAL; goto out; } + + stop_gc_thread(sbi); + stop_discard_thread(sbi); + + drop_discard_cmd(sbi); + clear_opt(sbi, DISCARD); + f2fs_update_time(sbi, REQ_TIME); out: if (in != F2FS_GOING_DOWN_FULLSYNC) @@ -1758,31 +1946,21 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { - struct fscrypt_policy policy; struct inode *inode = file_inode(filp); - if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, - sizeof(policy))) - return -EFAULT; + if (!f2fs_sb_has_encrypt(inode->i_sb)) + return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return fscrypt_process_policy(filp, &policy); + return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - struct fscrypt_policy policy; - struct inode *inode = file_inode(filp); - int err; - - err = fscrypt_get_policy(inode, &policy); - if (err) - return err; - - if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy))) - return -EFAULT; - return 0; + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + return -EOPNOTSUPP; + return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) @@ -1791,16 +1969,18 @@ static int f2fs_ioc_get_encryption_pwsal struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1808,15 +1988,16 @@ static int f2fs_ioc_get_encryption_pwsal if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -1848,7 +2029,53 @@ static int f2fs_ioc_gc(struct file *filp mutex_lock(&sbi->gc_mutex); } - ret = f2fs_gc(sbi, sync); + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); +out: + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_gc_range range; + u64 end; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } +do_more: + if (!range.sync) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + } else { + mutex_lock(&sbi->gc_mutex); + } + + ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + range.start += sbi->blocks_per_seg; + if (range.start <= end) + goto do_more; out: mnt_drop_write_file(filp); return ret; @@ -1881,18 +2108,18 @@ static int f2fs_defragment_range(struct struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; - struct extent_info ei; - pgoff_t pg_start, pg_end; + struct f2fs_map_blocks map = { .m_next_extent = NULL, + .m_seg_type = NO_CHECK_TYPE }; + struct extent_info ei = {0,0,0}; + pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update(inode)) + if (should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -1918,6 +2145,7 @@ static int f2fs_defragment_range(struct } map.m_lblk = pg_start; + map.m_next_pgofs = &next_pgofs; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -1926,19 +2154,21 @@ static int f2fs_defragment_range(struct */ while (map.m_lblk < pg_end) { map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } - if (blk_end && blk_end != map.m_pblk) { + if (blk_end && blk_end != map.m_pblk) fragmented = true; - break; - } + + /* record total count of block that we're going to move */ + total += map.m_len; + blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; @@ -1947,10 +2177,7 @@ static int f2fs_defragment_range(struct if (!fragmented) goto out; - map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; - - sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -1962,18 +2189,22 @@ static int f2fs_defragment_range(struct goto out; } + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + total = 0; + while (map.m_lblk < pg_end) { pgoff_t idx; int cnt = 0; do_map: map.m_len = pg_end - map.m_lblk; - err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); if (err) goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } @@ -2027,42 +2258,40 @@ static int f2fs_ioc_defragment(struct fi if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) return -EINVAL; - err = mnt_want_write_file(filp); - if (err) - return err; - - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } + if (f2fs_readonly(sbi->sb)) + return -EROFS; if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, - sizeof(range))) { - err = -EFAULT; - goto out; - } + sizeof(range))) + return -EFAULT; /* verify alignment of offset & size */ - if (range.start & (F2FS_BLKSIZE - 1) || - range.len & (F2FS_BLKSIZE - 1)) { - err = -EINVAL; - goto out; - } + if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + if (unlikely((range.start + range.len) >> PAGE_SHIFT > + sbi->max_file_blocks)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) + return err; err = f2fs_defragment_range(sbi, filp, &range); + mnt_drop_write_file(filp); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) - goto out; + return err; if (copy_to_user((struct f2fs_defragment __user *)arg, &range, sizeof(range))) - err = -EFAULT; -out: - mnt_drop_write_file(filp); - return err; + return -EFAULT; + + return 0; } static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, @@ -2096,9 +2325,13 @@ static int f2fs_move_file_range(struct f } inode_lock(src); + down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { - if (!inode_trylock(dst)) { - ret = -EBUSY; + ret = -EBUSY; + if (!inode_trylock(dst)) + goto out; + if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { + inode_unlock(dst); goto out; } } @@ -2158,9 +2391,12 @@ static int f2fs_move_file_range(struct f } f2fs_unlock_op(sbi); out_unlock: - if (src != dst) + if (src != dst) { + up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); inode_unlock(dst); + } out: + up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2196,6 +2432,8 @@ static int f2fs_ioc_move_range(struct fi range.pos_out, range.len); mnt_drop_write_file(filp); + if (err) + goto err_out; if (copy_to_user((struct f2fs_move_range __user *)arg, &range, sizeof(range))) @@ -2205,8 +2443,203 @@ err_out: return err; } +static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct sit_info *sm = SIT_I(sbi); + unsigned int start_segno = 0, end_segno = 0; + unsigned int dev_start_segno = 0, dev_end_segno = 0; + struct f2fs_flush_device range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, + sizeof(range))) + return -EFAULT; + + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + sbi->segs_per_sec != 1) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Can't flush %u in %d for segs_per_sec %u != 1\n", + range.dev_num, sbi->s_ndevs, + sbi->segs_per_sec); + return -EINVAL; + } + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (range.dev_num != 0) + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); + + start_segno = sm->last_victim[FLUSH_DEVICE]; + if (start_segno < dev_start_segno || start_segno >= dev_end_segno) + start_segno = dev_start_segno; + end_segno = min(start_segno + range.segments, dev_end_segno); + + while (start_segno < end_segno) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + sm->last_victim[GC_CB] = end_segno + 1; + sm->last_victim[GC_GREEDY] = end_segno + 1; + sm->last_victim[ALLOC_NEXT] = end_segno + 1; + ret = f2fs_gc(sbi, true, true, start_segno); + if (ret == -EAGAIN) + ret = 0; + else if (ret < 0) + break; + start_segno++; + } +out: + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); + + /* Must validate to set it with SQLite behavior in Android. */ + sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; + + return put_user(sb_feature, (u32 __user *)arg); +} + +int f2fs_pin_file_control(struct inode *inode, bool inc) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + + if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: Enable GC = ino %lx after %x GC trials\n", + __func__, inode->i_ino, fi->i_gc_failures); + clear_inode_flag(inode, FI_PIN_FILE); + return -EAGAIN; + } + return 0; +} + +static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin; + int ret = 0; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(pin, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + if (should_update_outplace(inode, NULL)) { + ret = -EINVAL; + goto out; + } + + if (!pin) { + clear_inode_flag(inode, FI_PIN_FILE); + F2FS_I(inode)->i_gc_failures = 1; + goto done; + } + + if (f2fs_pin_file_control(inode, false)) { + ret = -EAGAIN; + goto out; + } + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; + + set_inode_flag(inode, FI_PIN_FILE); + ret = F2FS_I(inode)->i_gc_failures; +done: + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin = 0; + + if (is_inode_flag_set(inode, FI_PIN_FILE)) + pin = F2FS_I(inode)->i_gc_failures; + return put_user(pin, (u32 __user *)arg); +} + +int f2fs_precache_extents(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_map_blocks map; + pgoff_t m_next_extent; + loff_t end; + int err; + + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return -EOPNOTSUPP; + + map.m_lblk = 0; + map.m_next_pgofs = NULL; + map.m_next_extent = &m_next_extent; + map.m_seg_type = NO_CHECK_TYPE; + end = F2FS_I_SB(inode)->max_file_blocks; + + while (map.m_lblk < end) { + map.m_len = end - map.m_lblk; + + down_write(&fi->dio_rwsem[WRITE]); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); + up_write(&fi->dio_rwsem[WRITE]); + if (err) + return err; + + map.m_lblk = m_next_extent; + } + + return err; +} + +static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) +{ + return f2fs_precache_extents(file_inode(filp)); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2236,12 +2669,24 @@ long f2fs_ioctl(struct file *filp, unsig return f2fs_ioc_get_encryption_pwsalt(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); + case F2FS_IOC_GARBAGE_COLLECT_RANGE: + return f2fs_ioc_gc_range(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: return f2fs_ioc_defragment(filp, arg); case F2FS_IOC_MOVE_RANGE: return f2fs_ioc_move_range(filp, arg); + case F2FS_IOC_FLUSH_DEVICE: + return f2fs_ioc_flush_device(filp, arg); + case F2FS_IOC_GET_FEATURES: + return f2fs_ioc_get_features(filp, arg); + case F2FS_IOC_GET_PIN_FILE: + return f2fs_ioc_get_pin_file(filp, arg); + case F2FS_IOC_SET_PIN_FILE: + return f2fs_ioc_set_pin_file(filp, arg); + case F2FS_IOC_PRECACHE_EXTENTS: + return f2fs_ioc_precache_extents(filp, arg); default: return -ENOTTY; } @@ -2254,20 +2699,59 @@ static ssize_t f2fs_file_write_iter(stru struct blk_plug plug; ssize_t ret; - if (f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } - inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { - ret = f2fs_preallocate_blocks(iocb, from); - if (!ret) { - blk_start_plug(&plug); - ret = __generic_file_write_iter(iocb, from); - blk_finish_plug(&plug); + bool preallocated = false; + size_t target_size = 0; + int err; + + if (iov_iter_fault_in_readable(from, iov_iter_count(from))) + set_inode_flag(inode, FI_NO_PREALLOC); + + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } + blk_start_plug(&plug); + ret = __generic_file_write_iter(iocb, from); + blk_finish_plug(&plug); + clear_inode_flag(inode, FI_NO_PREALLOC); + + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); @@ -2299,10 +2783,15 @@ long f2fs_compat_ioctl(struct file *file case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_POLICY: case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - break; case F2FS_IOC_MOVE_RANGE: + case F2FS_IOC_FLUSH_DEVICE: + case F2FS_IOC_GET_FEATURES: + case F2FS_IOC_GET_PIN_FILE: + case F2FS_IOC_SET_PIN_FILE: + case F2FS_IOC_PRECACHE_EXTENTS: break; default: return -ENOIOCTLCMD; @@ -2318,6 +2807,7 @@ const struct file_operations f2fs_file_o .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, + .flush = f2fs_file_flush, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, .unlocked_ioctl = f2fs_ioctl,