diff --git a/Documentation/btrfs-rescue.rst b/Documentation/btrfs-rescue.rst index f52e6c2635..7fc2bde590 100644 --- a/Documentation/btrfs-rescue.rst +++ b/Documentation/btrfs-rescue.rst @@ -50,6 +50,34 @@ fix-device-size WARNING: CPU: 3 PID: 439 at fs/btrfs/ctree.h:1559 btrfs_update_device+0x1c5/0x1d0 [btrfs] +fix-data-checksum + fix data checksum mismatch + + There is a long existing problem that if a user space program is doing + direct IO and modifies the buffer before the write back finished, it + can lead to data checksum mismatches. + + This problem is known but not fixed until upstream release v6.15 + (backported to older kernels). So it's possible to hit false data + checksum mismatch for any long running btrfs. + + In that case this program can be utilized to repair such problem. + + ``Options`` + + -r|--readonly + readonly mode, only scan and report for data checksum mismatch, + do no repair + + -i|--interactive + interactive mode, ask for how to repair, ignore the error by default + + -m|--mirror + use specified mirror to update the checksum item for all corrupted blocks. + + The value must be >= 1, and if the corrupted block has less mirrors than + the value, the mirror number will be `num % (num_mirrors + 1)`. + .. _man-rescue-clear-ino-cache: clear-ino-cache diff --git a/Makefile b/Makefile index 7e36aa4257..523b834955 100644 --- a/Makefile +++ b/Makefile @@ -256,7 +256,7 @@ cmds_objects = cmds/subvolume.o cmds/subvolume-list.o \ cmds/inspect.o cmds/balance.o cmds/send.o cmds/receive.o \ cmds/quota.o cmds/qgroup.o cmds/replace.o check/main.o \ cmds/restore.o cmds/rescue.o cmds/rescue-chunk-recover.o \ - cmds/rescue-super-recover.o \ + cmds/rescue-super-recover.o cmds/rescue-fix-data-checksum.o \ cmds/property.o cmds/filesystem-usage.o cmds/inspect-dump-tree.o \ cmds/inspect-dump-super.o cmds/inspect-tree-stats.o cmds/filesystem-du.o \ cmds/reflink.o \ diff --git a/check/mode-lowmem.c b/check/mode-lowmem.c index 34af77f884..713ddc3d88 100644 --- a/check/mode-lowmem.c +++ b/check/mode-lowmem.c @@ -3977,6 +3977,139 @@ static int check_shared_block_backref(u64 parent, u64 bytenr, int level) return 0; } +/* + * A read-only version of lookup_inline_extent_backref(). + * We can not reuse that function as it always assume COW. + */ +static int has_inline_shared_backref(u64 data_bytenr, u64 data_len, u64 parent) +{ + struct btrfs_root *extent_root = btrfs_extent_root(gfs_info, data_bytenr); + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_item *ei; + struct btrfs_path path = { 0 }; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + unsigned long end; + bool found = false; + u32 item_size; + u64 flags; + int ret; + + key.objectid = data_bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = data_len; + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; + + leaf = path.nodes[0]; + item_size = btrfs_item_size(leaf, path.slots[0]); + if (item_size < sizeof(*ei)) { + error("extent item size %u < %zu, leaf %llu slot %u", + item_size, sizeof(*ei), leaf->start, path.slots[0]); + ret = -EUCLEAN; + goto out; + } + ei = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + + if (!(flags & BTRFS_EXTENT_FLAG_DATA)) { + error("backref item flag for bytenr %llu is not data", + data_bytenr); + ret = -EUCLEAN; + goto out; + } + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + item_size; + + while (true) { + u64 ref_parent; + u8 type; + + if (ptr >= end) { + if (ptr > end) { + error("inline extent item for %llu is not properly ended", + data_bytenr); + ret = -EUCLEAN; + goto out; + } + break; + } + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + if (type != BTRFS_SHARED_DATA_REF_KEY) + goto next; + + ref_parent = btrfs_extent_inline_ref_offset(leaf, iref); + if (ref_parent == parent) { + found = true; + goto out; + } +next: + ptr += btrfs_extent_inline_ref_size(type); + } + +out: + btrfs_release_path(&path); + if (ret < 0) + return ret; + return found; +} + +static int has_keyed_shared_backref(u64 data_bytenr, u64 parent) +{ + struct btrfs_root *extent_root = btrfs_extent_root(gfs_info, data_bytenr); + struct btrfs_path path = { 0 }; + struct btrfs_key key; + int ret; + + key.objectid = data_bytenr; + key.type = BTRFS_SHARED_DATA_REF_KEY; + key.offset = parent; + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + btrfs_release_path(&path); + if (ret < 0) + return ret; + /* No keyed ref found, return 0. */ + if (ret > 0) + return 0; + return 1; +} + +/* + * A helper to determine if the @leaf already belongs to a shared data backref item. + * (with parent bytenr) + * + * Return >0 if the @leaf belongs to a shared data backref. + * Return 0 if not. + * Return <0 for critical error. + */ +static int is_leaf_shared(struct extent_buffer *leaf, u64 data_bytenr, u64 data_len) +{ + int ret; + + ret = has_inline_shared_backref(data_bytenr, data_len, leaf->start); + if (ret < 0) { + errno = -ret; + error("failed to search inlined shared backref for logical %llu len %llu, %m", + data_bytenr, data_len); + return ret; + } + if (ret > 0) + return ret; + ret = has_keyed_shared_backref(data_bytenr, leaf->start); + if (ret < 0) { + errno = -ret; + error("failed to search keyed shared backref for logical %llu len %llu, %m", + data_bytenr, data_len); + } + return ret; +} + /* * Check referencer for normal (inlined) data ref * If len == 0, it will be resolved by searching in extent tree @@ -4049,13 +4182,13 @@ static int check_extent_data_backref(u64 root_id, u64 objectid, u64 offset, btrfs_header_owner(leaf) != root_id) goto next; /* - * For tree blocks have been relocated, data backref are - * shared instead of keyed. Do not account it. + * If the node belongs to a shared backref item, we should not + * account the number. */ - if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { - /* - * skip the leaf to speed up. - */ + ret = is_leaf_shared(leaf, bytenr, len); + if (ret < 0) + break; + if (ret > 0) { slot = btrfs_header_nritems(leaf); goto next; } diff --git a/cmds/replace.c b/cmds/replace.c index 5f1222b241..887c3251a7 100644 --- a/cmds/replace.c +++ b/cmds/replace.c @@ -319,12 +319,11 @@ static int cmd_replace_start(const struct cmd_struct *cmd, ret = ioctl(fdmnt, BTRFS_IOC_DEV_REPLACE, &start_args); if (do_not_background) { if (ret < 0) { - error("ioctl(DEV_REPLACE_START) failed on \"%s\": %m", path); - if (start_args.result != BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT) - pr_stderr(LOG_DEFAULT, ", %s\n", - replace_dev_result2string(start_args.result)); + if (start_args.result == BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_RESULT) + error("ioctl(DEV_REPLACE_START) failed on \"%s\": %m", path); else - pr_stderr(LOG_DEFAULT, "\n"); + error("ioctl(DEV_REPLACE_START) failed on \"%s\": %m, %s", + path, replace_dev_result2string(start_args.result)); if (errno == EOPNOTSUPP) warning("device replace of RAID5/6 not supported with this kernel"); diff --git a/cmds/rescue-fix-data-checksum.c b/cmds/rescue-fix-data-checksum.c new file mode 100644 index 0000000000..23b59fffe2 --- /dev/null +++ b/cmds/rescue-fix-data-checksum.c @@ -0,0 +1,511 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "kerncompat.h" +#include "kernel-shared/disk-io.h" +#include "kernel-shared/ctree.h" +#include "kernel-shared/volumes.h" +#include "kernel-shared/backref.h" +#include "kernel-shared/transaction.h" +#include "kernel-shared/file-item.h" +#include "common/messages.h" +#include "common/open-utils.h" +#include "cmds/rescue.h" + +/* + * Record one corrupted data blocks. + * + * We do not report immediately, this is for future file deleting support. + */ +struct corrupted_block { + struct list_head list; + /* The logical bytenr of the exact corrupted block. */ + u64 logical; + + /* The amount of mirrors above logical have. */ + unsigned int num_mirrors; + + /* + * Which mirror failed. + * + * Note, bit 0 means mirror 1, since mirror 0 means choosing a + * live mirror, and we never utilized that mirror 0. + */ + unsigned long *error_mirror_bitmap; +}; + +enum fix_data_checksum_action_value { + ACTION_IGNORE, + ACTION_UPDATE_CSUM, + ACTION_LAST, +}; + +static const struct fix_data_checksum_action { + enum fix_data_checksum_action_value value; + const char *string; +} actions[] = { + [ACTION_IGNORE] = { + .value = ACTION_IGNORE, + .string = "ignore", + }, + [ACTION_UPDATE_CSUM] = { + .value = ACTION_UPDATE_CSUM, + .string = "update-csum", + }, +}; + +static int global_repair_mode; +LIST_HEAD(corrupted_blocks); + +static int add_corrupted_block(struct btrfs_fs_info *fs_info, u64 logical, + unsigned int mirror, unsigned int num_mirrors) +{ + struct corrupted_block *last; + if (list_empty(&corrupted_blocks)) + goto add; + + last = list_entry(corrupted_blocks.prev, struct corrupted_block, list); + /* The last entry is the same, just set update the error mirror bitmap. */ + if (last->logical == logical) { + UASSERT(last->error_mirror_bitmap); + set_bit(mirror, last->error_mirror_bitmap); + return 0; + } +add: + last = calloc(1, sizeof(*last)); + if (!last) + return -ENOMEM; + last->error_mirror_bitmap = calloc(1, BITS_TO_LONGS(num_mirrors)); + if (!last->error_mirror_bitmap) { + free(last); + return -ENOMEM; + } + set_bit(mirror - 1, last->error_mirror_bitmap); + last->logical = logical; + last->num_mirrors = num_mirrors; + + list_add_tail(&last->list, &corrupted_blocks); + return 0; +} + +/* + * Verify all mirrors for @logical. + * + * If something critical happened, return <0 and should end the run immediately. + * Otherwise return 0, including data checksum mismatch or read failure. + */ +static int verify_one_data_block(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + unsigned long leaf_offset, u64 logical, + unsigned int num_mirrors) +{ + const u32 blocksize = fs_info->sectorsize; + const u32 csum_size = fs_info->csum_size; + u8 *buf; + u8 csum[BTRFS_CSUM_SIZE]; + u8 csum_expected[BTRFS_CSUM_SIZE]; + int ret = 0; + + buf = malloc(blocksize); + if (!buf) + return -ENOMEM; + + for (int mirror = 1; mirror <= num_mirrors; mirror++) { + u64 read_len = blocksize; + + ret = read_data_from_disk(fs_info, buf, logical, &read_len, mirror); + if (ret < 0) { + /* IO error, add one record. */ + ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors); + if (ret < 0) + break; + } + /* Verify the data checksum. */ + btrfs_csum_data(fs_info, fs_info->csum_type, buf, csum, blocksize); + read_extent_buffer(leaf, csum_expected, leaf_offset, csum_size); + if (memcmp(csum_expected, csum, csum_size) != 0) { + ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors); + if (ret < 0) + break; + } + } + + free(buf); + return ret; +} + +static int iterate_one_csum_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path) +{ + struct btrfs_key key; + const unsigned long item_ptr_off = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + const u32 blocksize = fs_info->sectorsize; + int num_mirrors; + u64 data_size; + u64 cur; + char *buf; + int ret = 0; + + buf = malloc(blocksize); + if (!buf) + return -ENOMEM; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + data_size = btrfs_item_size(path->nodes[0], path->slots[0]) / + fs_info->csum_size * blocksize; + num_mirrors = btrfs_num_copies(fs_info, key.offset, data_size); + + for (cur = 0; cur < data_size; cur += blocksize) { + const unsigned long leaf_offset = item_ptr_off + + cur / blocksize * fs_info->csum_size; + + ret = verify_one_data_block(fs_info, path->nodes[0], leaf_offset, + key.offset + cur, num_mirrors); + if (ret < 0) + break; + } + free(buf); + return ret; +} + +static int print_filenames(u64 ino, u64 offset, u64 rootid, void *ctx) +{ + struct btrfs_fs_info *fs_info = ctx; + struct btrfs_root *root; + struct btrfs_key key; + struct inode_fs_paths *ipath; + struct btrfs_path path = { 0 }; + int ret; + + key.objectid = rootid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + errno = -ret; + error("failed to get subvolume %llu: %m", rootid); + return ret; + } + ipath = init_ipath(128 * BTRFS_PATH_NAME_MAX, root, &path); + if (IS_ERR(ipath)) { + ret = PTR_ERR(ipath); + errno = -ret; + error("failed to initialize ipath: %m"); + return ret; + } + ret = paths_from_inode(ino, ipath); + if (ret < 0) { + errno = -ret; + error("failed to resolve root %llu ino %llu to paths: %m", rootid, ino); + goto out; + } + for (int i = 0; i < ipath->fspath->elem_cnt; i++) + printf(" (subvolume %llu)/%s\n", rootid, (char *)ipath->fspath->val[i]); + if (ipath->fspath->elem_missed) + printf(" (subvolume %llu) %d files not printed\n", rootid, + ipath->fspath->elem_missed); +out: + free_ipath(ipath); + return ret; +} + +static int iterate_csum_root(struct btrfs_fs_info *fs_info, struct btrfs_root *csum_root) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key; + int ret; + + key.objectid = 0; + key.type = 0; + key.offset = 0; + + ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0); + if (ret < 0) { + errno = -ret; + error("failed to get the first tree block of csum tree: %m"); + return ret; + } + UASSERT(ret > 0); + while (true) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_CSUM_KEY) + goto next; + ret = iterate_one_csum_item(fs_info, &path); + if (ret < 0) + break; +next: + ret = btrfs_next_item(csum_root, &path); + if (ret > 0) { + ret = 0; + break; + } + if (ret < 0) { + errno = -ret; + error("failed to get next csum item: %m"); + } + } + btrfs_release_path(&path); + return ret; +} + +#define ASK_ACTION_BUFSIZE (32) +static enum fix_data_checksum_action_value ask_action(unsigned int num_mirrors, + unsigned int *mirror_ret) +{ + unsigned long ret; + char buf[ASK_ACTION_BUFSIZE] = { 0 }; + bool printed; + char *endptr; + +again: + printed = false; + for (int i = 0; i < ACTION_LAST; i++) { + if (printed) + printf("/"); + /* Mark Ignore as default */ + if (i == ACTION_IGNORE) { + printf("<<%c>>%s", toupper(actions[i].string[0]), + actions[i].string + 1); + } else if (i == ACTION_UPDATE_CSUM) { + /* + * For update-csum action, we need a mirror number, + * so output all valid mirrors numbers instead. + */ + for (int cur_mirror = 1; cur_mirror <= num_mirrors; + cur_mirror++) + printf("<%u>", cur_mirror); + } else { + printf("<%c>%s", toupper(actions[i].string[0]), + actions[i].string + 1); + } + printed = true; + } + printf(":"); + fflush(stdout); + /* Default to Ignore if no action provided. */ + if (!fgets(buf, sizeof(buf) - 1, stdin)) + return ACTION_IGNORE; + if (buf[0] == '\n') + return ACTION_IGNORE; + /* Check exact match or matching the initial letter. */ + for (int i = 0; i < ACTION_LAST; i++) { + if ((strncasecmp(buf, actions[i].string, 1) == 0 || + strncasecmp(buf, actions[i].string, ASK_ACTION_BUFSIZE) == 0) && + actions[i].value != ACTION_UPDATE_CSUM) + return actions[i].value; + } + /* No match, check if it's some numeric string. */ + ret = strtoul(buf, &endptr, 10); + if (endptr == buf || ret == ULONG_MAX) { + /* No valid action found, retry. */ + warning("invalid action, please retry"); + goto again; + } + if (ret > num_mirrors || ret == 0) { + warning("invalid mirror number %lu, must be in range [1, %d], please retry", + ret, num_mirrors); + goto again; + } + *mirror_ret = ret; + return ACTION_UPDATE_CSUM; +} + +static int update_csum_item(struct btrfs_fs_info *fs_info, u64 logical, + unsigned int mirror) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *csum_root = btrfs_csum_root(fs_info, logical); + struct btrfs_path path = { 0 }; + struct btrfs_csum_item *citem; + u64 read_len = fs_info->sectorsize; + u8 csum[BTRFS_CSUM_SIZE] = { 0 }; + u8 *buf; + int ret; + + buf = malloc(fs_info->sectorsize); + if (!buf) + return -ENOMEM; + ret = read_data_from_disk(fs_info, buf, logical, &read_len, mirror); + if (ret < 0) { + errno = -ret; + error("failed to read block at logical %llu mirror %u: %m", + logical, mirror); + goto out; + } + trans = btrfs_start_transaction(csum_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + errno = -ret; + error_msg(ERROR_MSG_START_TRANS, "%m"); + goto out; + } + citem = btrfs_lookup_csum(trans, csum_root, &path, logical, + BTRFS_EXTENT_CSUM_OBJECTID, fs_info->csum_type, 1); + if (IS_ERR(citem)) { + ret = PTR_ERR(citem); + errno = -ret; + error("failed to find csum item for logical %llu: $m", logical); + btrfs_abort_transaction(trans, ret); + goto out; + } + btrfs_csum_data(fs_info, fs_info->csum_type, buf, csum, fs_info->sectorsize); + write_extent_buffer(path.nodes[0], csum, (unsigned long)citem, fs_info->csum_size); + btrfs_release_path(&path); + ret = btrfs_commit_transaction(trans, csum_root); + if (ret < 0) { + errno = -ret; + error_msg(ERROR_MSG_COMMIT_TRANS, "%m"); + } + printf("Csum item for logical %llu updated using data from mirror %u\n", + logical, mirror); +out: + free(buf); + btrfs_release_path(&path); + return ret; +} + +static void report_corrupted_blocks(struct btrfs_fs_info *fs_info, + enum btrfs_fix_data_checksum_mode mode, + unsigned int mirror) +{ + struct corrupted_block *entry; + struct btrfs_path path = { 0 }; + enum fix_data_checksum_action_value action; + + if (list_empty(&corrupted_blocks)) { + printf("No data checksum mismatch found\n"); + return; + } + + list_for_each_entry(entry, &corrupted_blocks, list) { + bool has_printed = false; + int ret; + + printf("logical=%llu corrtuped mirrors=", entry->logical); + /* Poor man's bitmap print. */ + for (int i = 0; i < entry->num_mirrors; i++) { + if (test_bit(i, entry->error_mirror_bitmap)) { + if (has_printed) + printf(","); + /* + * Bit 0 means mirror 1, thus we need to increase + * the value by 1. + */ + printf("%d", i + 1); + has_printed=true; + } + } + printf(" affected files:\n"); + ret = iterate_inodes_from_logical(entry->logical, fs_info, &path, + print_filenames, fs_info); + if (ret < 0) { + errno = -ret; + error("failed to iterate involved files: %m"); + break; + } + switch (mode) { + case BTRFS_FIX_DATA_CSUMS_INTERACTIVE: + action = ask_action(entry->num_mirrors, &mirror); + break; + case BTRFS_FIX_DATA_CSUMS_READONLY: + action = ACTION_IGNORE; + break; + case BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM: + action = ACTION_UPDATE_CSUM; + mirror = mirror % (entry->num_mirrors + 1); + break; + default: + UASSERT(0); + } + + switch (action) { + case ACTION_IGNORE: + break; + case ACTION_UPDATE_CSUM: + UASSERT(mirror > 0 && mirror <= entry->num_mirrors); + ret = update_csum_item(fs_info, entry->logical, mirror); + break; + default: + UASSERT(0); + } + } +} + +static void free_corrupted_blocks(void) +{ + while (!list_empty(&corrupted_blocks)) { + struct corrupted_block *entry; + + entry = list_entry(corrupted_blocks.next, struct corrupted_block, list); + list_del_init(&entry->list); + free(entry->error_mirror_bitmap); + free(entry); + } +} + +int btrfs_recover_fix_data_checksum(const char *path, + enum btrfs_fix_data_checksum_mode mode, + unsigned int mirror) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_root *csum_root; + struct open_ctree_args oca = { 0 }; + int ret; + + if (mode >= BTRFS_FIX_DATA_CSUMS_LAST) + return -EINVAL; + + if (mode == BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM) + UASSERT(mirror > 0); + ret = check_mounted(path); + if (ret < 0) { + errno = -ret; + error("could not check mount status: %m"); + return ret; + } + if (ret > 0) { + error("%s is currently mounted", path); + return -EBUSY; + } + + global_repair_mode = mode; + oca.filename = path; + oca.flags = OPEN_CTREE_WRITES; + fs_info = open_ctree_fs_info(&oca); + if (!fs_info) { + error("failed to open btrfs at %s", path); + return -EIO; + } + csum_root = btrfs_csum_root(fs_info, 0); + if (!csum_root) { + error("failed to get csum root"); + ret = -EIO; + goto out_close; + } + ret = iterate_csum_root(fs_info, csum_root); + if (ret) { + errno = -ret; + error("failed to iterate csum tree: %m"); + } + report_corrupted_blocks(fs_info, mode, mirror); +out_close: + free_corrupted_blocks(); + close_ctree_fs_info(fs_info); + return ret; +} diff --git a/cmds/rescue.c b/cmds/rescue.c index c60bf11675..f575646c73 100644 --- a/cmds/rescue.c +++ b/cmds/rescue.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "kernel-lib/list.h" #include "kernel-shared/ctree.h" #include "kernel-shared/volumes.h" @@ -30,6 +31,7 @@ #include "kernel-shared/extent_io.h" #include "kernel-shared/accessors.h" #include "kernel-shared/uapi/btrfs_tree.h" +#include "common/string-utils.h" #include "common/messages.h" #include "common/utils.h" #include "common/help.h" @@ -275,6 +277,68 @@ static int cmd_rescue_fix_device_size(const struct cmd_struct *cmd, } static DEFINE_SIMPLE_COMMAND(rescue_fix_device_size, "fix-device-size"); +static const char * const cmd_rescue_fix_data_checksum_usage[] = { + "btrfs rescue fix-data-checksum ", + "Fix data checksum mismatches.", + "", + OPTLINE("-r|--readonly", "readonly mode, only report errors without repair"), + OPTLINE("-i|--interactive", "interactive mode, ignore the error by default."), + OPTLINE("-m|--mirror ", "update csum item using specified mirror"), + HELPINFO_INSERT_GLOBALS, + HELPINFO_INSERT_VERBOSE, + NULL +}; + +static int cmd_rescue_fix_data_checksum(const struct cmd_struct *cmd, + int argc, char **argv) +{ + enum btrfs_fix_data_checksum_mode mode = BTRFS_FIX_DATA_CSUMS_READONLY; + unsigned int mirror = 0; + int ret; + optind = 0; + + while (1) { + int c; + enum { GETOPT_VAL_DRYRUN = GETOPT_VAL_FIRST, }; + static const struct option long_options [] = { + {"readonly", no_argument, NULL, 'r'}, + {"interactive", no_argument, NULL, 'i'}, + {"mirror", required_argument, NULL, 'm'}, + {"NULL", 0, NULL, 0}, + }; + c = getopt_long(argc, argv, "rim:", long_options, NULL); + if (c < 0) + break; + switch (c) { + case 'r': + mode = BTRFS_FIX_DATA_CSUMS_READONLY; + break; + case 'i': + mode = BTRFS_FIX_DATA_CSUMS_INTERACTIVE; + break; + case 'm': + mode = BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM; + mirror = arg_strtou64(optarg); + if (mirror == 0) { + error("invalid mirror number %u, must be >= 1", mirror); + return 1; + } + break; + default: + usage_unknown_option(cmd, argv); + } + } + if (check_argc_min(argc - optind, 1)) + return 1; + ret = btrfs_recover_fix_data_checksum(argv[optind], mode, mirror); + if (ret < 0) { + errno = -ret; + error("failed to fix data checksums: %m"); + } + return !!ret; +} +static DEFINE_SIMPLE_COMMAND(rescue_fix_data_checksum, "fix-data-checksum"); + static const char * const cmd_rescue_create_control_device_usage[] = { "btrfs rescue create-control-device", "Create /dev/btrfs-control (see 'CONTROL DEVICE' in btrfs(5))", @@ -527,6 +591,7 @@ static const struct cmd_group rescue_cmd_group = { &cmd_struct_rescue_super_recover, &cmd_struct_rescue_zero_log, &cmd_struct_rescue_fix_device_size, + &cmd_struct_rescue_fix_data_checksum, &cmd_struct_rescue_create_control_device, &cmd_struct_rescue_clear_ino_cache, &cmd_struct_rescue_clear_space_cache, diff --git a/cmds/rescue.h b/cmds/rescue.h index 5a9e46b7aa..f78ec436a9 100644 --- a/cmds/rescue.h +++ b/cmds/rescue.h @@ -20,7 +20,17 @@ #ifndef __BTRFS_RESCUE_H__ #define __BTRFS_RESCUE_H__ +enum btrfs_fix_data_checksum_mode { + BTRFS_FIX_DATA_CSUMS_READONLY, + BTRFS_FIX_DATA_CSUMS_INTERACTIVE, + BTRFS_FIX_DATA_CSUMS_UPDATE_CSUM_ITEM, + BTRFS_FIX_DATA_CSUMS_LAST, +}; + int btrfs_recover_superblocks(const char *path, int yes); int btrfs_recover_chunk_tree(const char *path, int yes); +int btrfs_recover_fix_data_checksum(const char *path, + enum btrfs_fix_data_checksum_mode mode, + unsigned int mirror); #endif diff --git a/kernel-shared/ctree.c b/kernel-shared/ctree.c index 3184c91617..f90de606e7 100644 --- a/kernel-shared/ctree.c +++ b/kernel-shared/ctree.c @@ -1246,6 +1246,17 @@ static void reada_for_search(struct btrfs_fs_info *fs_info, } } +/* + * Find the first key in @fs_root that matches all the following conditions: + * + * - key.obojectid == @iobjectid + * - key.type == @key_type + * - key.offset >= ioff + * + * Return 0 if such key can be found, and @found_key is updated. + * Return >0 if no such key can be found. + * Return <0 for critical errors. + */ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, u64 iobjectid, u64 ioff, u8 key_type, struct btrfs_key *found_key) @@ -1280,10 +1291,10 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path, btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); if (found_key->type != key.type || - found_key->objectid != key.objectid) { + found_key->objectid != key.objectid) ret = 1; - goto out; - } + else + ret = 0; out: if (path != found_path) diff --git a/kernel-shared/file-item.c b/kernel-shared/file-item.c index 18791c0647..503ad657c6 100644 --- a/kernel-shared/file-item.c +++ b/kernel-shared/file-item.c @@ -112,7 +112,7 @@ int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, return err; } -static struct btrfs_csum_item * +struct btrfs_csum_item * btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/kernel-shared/file-item.h b/kernel-shared/file-item.h index cab0bc4e9c..5a5d8da102 100644 --- a/kernel-shared/file-item.h +++ b/kernel-shared/file-item.h @@ -89,6 +89,11 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *stack_fi); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical, u64 csum_objectid, u32 csum_type, const char *data); +struct btrfs_csum_item * +btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 csum_objectid, u16 csum_type, int cow); int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, const char *buffer, size_t size, diff --git a/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img b/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img index 80345cf949..02c63ef7c4 100644 Binary files a/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img and b/tests/fsck-tests/020-extent-ref-cases/keyed_data_ref_with_reloc_leaf.img differ