firmware/br-ext-chip-allwinner/board/v83x/kernel/patches/00000-fs_pstore_blkzone.c.p...

1319 lines
33 KiB
Diff

diff -drupN a/fs/pstore/blkzone.c b/fs/pstore/blkzone.c
--- a/fs/pstore/blkzone.c 1970-01-01 03:00:00.000000000 +0300
+++ b/fs/pstore/blkzone.c 2022-06-12 05:28:14.000000000 +0300
@@ -0,0 +1,1314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * blkzone.c: Block device Oops/Panic logger
+ *
+ * Copyright (C) 2019 liaoweixiong <liaoweixiong@gallwinnertech.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define MODNAME "pstore-blk"
+#define pr_fmt(fmt) MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/pstore.h>
+#include <linux/mount.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/pstore_blk.h>
+#include <linux/kdev_t.h>
+#include <linux/device.h>
+#include <linux/namei.h>
+#include <linux/fcntl.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+
+/**
+ * struct blkz_head - head of zone to flush to storage
+ *
+ * @sig: signature to indicate header (BLK_SIG xor BLKZONE-type value)
+ * @datalen: length of data in @data
+ * @start: offset into @data where the beginning of the stored bytes begin
+ * @data: zone data.
+ */
+struct blkz_buffer {
+#define BLK_SIG (0x43474244) /* DBGC */
+ uint32_t sig;
+ atomic_t datalen;
+ atomic_t start;
+ uint8_t data[];
+};
+
+/**
+ * struct blkz_dmesg_header: dmesg information
+ *
+ * @magic: magic num for dmesg header
+ * @time: trigger time
+ * @compressed: whether conpressed
+ * @count: oops/panic counter
+ * @reason: identify oops or panic
+ */
+struct blkz_dmesg_header {
+#define DMESG_HEADER_MAGIC 0x4dfc3ae5
+ uint32_t magic;
+ struct timespec64 time;
+ bool compressed;
+ uint32_t counter;
+ enum kmsg_dump_reason reason;
+ uint8_t data[0];
+};
+
+/**
+ * struct blkz_zone - zone information
+ * @off:
+ * zone offset of block device
+ * @type:
+ * frontent type for this zone
+ * @name:
+ * frontent name for this zone
+ * @buffer:
+ * pointer to data buffer managed by this zone
+ * @oldbuf:
+ * pointer to old data buffer. It is used for single zone such as pmsg,
+ * saving the old buffer.
+ * @buffer_size:
+ * bytes in @buffer->data
+ * @should_recover:
+ * should recover from storage
+ * @dirty:
+ * mark whether the data in @buffer are dirty (not flush to storage yet)
+ */
+struct blkz_zone {
+ unsigned long off;
+ const char *name;
+ enum pstore_type_id type;
+
+ struct blkz_buffer *buffer;
+ struct blkz_buffer *oldbuf;
+ size_t buffer_size;
+ bool should_recover;
+ atomic_t dirty;
+};
+
+struct blkz_context {
+ struct blkz_zone **dbzs; /* dmesg block zones */
+ struct blkz_zone *pbz; /* Pmsg block zone */
+ unsigned int dmesg_max_cnt;
+ unsigned int dmesg_read_cnt;
+ unsigned int pmsg_read_cnt;
+ unsigned int dmesg_write_cnt;
+ /*
+ * the counter should be recovered when do recovery
+ * It records the oops/panic times after burning rather than booting.
+ */
+ unsigned int oops_counter;
+ unsigned int panic_counter;
+ unsigned int oom_counter;
+ atomic_t recovery;
+ atomic_t on_panic;
+
+ /*
+ * bzinfo_lock just protects "bzinfo" during calls to
+ * blkz_register/blkz_unregister
+ */
+ spinlock_t bzinfo_lock;
+ struct blkz_info *bzinfo;
+ struct pstore_info pstore;
+ struct block_device *bdev;
+};
+static struct blkz_context blkz_cxt;
+
+enum blkz_flush_mode {
+ FLUSH_NONE = 0,
+ FLUSH_PART,
+ FLUSH_META,
+ FLUSH_ALL,
+};
+
+static inline int buffer_datalen(struct blkz_zone *zone)
+{
+ return atomic_read(&zone->buffer->datalen);
+}
+
+static inline int buffer_start(struct blkz_zone *zone)
+{
+ return atomic_read(&zone->buffer->start);
+}
+
+static inline bool is_on_panic(void)
+{
+ struct blkz_context *cxt = &blkz_cxt;
+
+ return atomic_read(&cxt->on_panic);
+}
+
+static int blkz_zone_read(struct blkz_zone *zone, char *buf,
+ size_t len, unsigned long off)
+{
+ if (!buf || !zone->buffer)
+ return -EINVAL;
+ if (off > zone->buffer_size)
+ return -EINVAL;
+ len = min_t(size_t, len, zone->buffer_size - off);
+ memcpy(buf, zone->buffer->data + off, len);
+ return 0;
+}
+
+static int blkz_zone_write(struct blkz_zone *zone,
+ enum blkz_flush_mode flush_mode, const char *buf,
+ size_t len, unsigned long off)
+{
+ struct blkz_info *info = blkz_cxt.bzinfo;
+ ssize_t wcnt;
+ ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos);
+ size_t wlen;
+
+ if (off > zone->buffer_size)
+ return -EINVAL;
+ wlen = min_t(size_t, len, zone->buffer_size - off);
+ if (flush_mode == FLUSH_PART || flush_mode == FLUSH_ALL) {
+ if (buf && wlen) {
+ memcpy(zone->buffer->data + off, buf, wlen);
+ atomic_set(&zone->buffer->datalen, wlen + off);
+ }
+ }
+
+ writeop = is_on_panic() ? info->panic_write : info->write;
+ if (!writeop)
+ return -EINVAL;
+
+ switch (flush_mode) {
+ case FLUSH_NONE:
+ return 0;
+ case FLUSH_PART:
+ wcnt = writeop((const char *)zone->buffer->data + off, wlen,
+ zone->off + sizeof(*zone->buffer) + off);
+ if (wcnt != wlen)
+ goto set_dirty;
+ case FLUSH_META:
+ wlen = sizeof(struct blkz_buffer);
+ wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
+ if (wcnt != wlen)
+ goto set_dirty;
+ break;
+ case FLUSH_ALL:
+ wlen = buffer_datalen(zone) + sizeof(*zone->buffer);
+ wcnt = writeop((const char *)zone->buffer, wlen, zone->off);
+ if (wcnt != wlen)
+ goto set_dirty;
+ break;
+ }
+
+ return 0;
+set_dirty:
+ pr_err("write failed with %zd returned, set dirty\n", wcnt);
+ atomic_set(&zone->dirty, true);
+ return -EBUSY;
+}
+
+static int blkz_flush_dirty_zone(struct blkz_zone *zone)
+{
+ int ret;
+
+ if (!zone)
+ return -EINVAL;
+
+ if (!atomic_read(&zone->dirty))
+ return 0;
+
+ pr_info("try to flush dirty %s zone with offset %lu\n",
+ zone->name, zone->off);
+ ret = blkz_zone_write(zone, FLUSH_ALL, NULL, 0, 0);
+ if (!ret)
+ atomic_set(&zone->dirty, false);
+ return ret;
+}
+
+static int blkz_flush_dirty_zones(struct blkz_zone **zones, unsigned int cnt)
+{
+ int i, ret;
+ struct blkz_zone *zone;
+
+ if (!zones)
+ return -EINVAL;
+
+ for (i = 0; i < cnt; i++) {
+ zone = zones[i];
+ if (!zone)
+ return -EINVAL;
+ ret = blkz_flush_dirty_zone(zone);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * blkz_move_zone: move data from a old zone to a new zone
+ *
+ * @old: the old zone
+ * @new: the new zone
+ *
+ * NOTE:
+ * Call blkz_zone_write to copy and flush data. If it failed, we
+ * should reset new->dirty, because the new zone not really dirty.
+ */
+static int blkz_move_zone(struct blkz_zone *old, struct blkz_zone *new)
+{
+ const char *data = (const char *)old->buffer->data;
+ int ret;
+
+ ret = blkz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0);
+ if (ret) {
+ atomic_set(&new->buffer->datalen, 0);
+ atomic_set(&new->dirty, false);
+ return ret;
+ }
+ atomic_set(&old->buffer->datalen, 0);
+ return 0;
+}
+
+static int blkz_recover_dmesg_data(struct blkz_context *cxt)
+{
+ struct blkz_info *info = cxt->bzinfo;
+ struct blkz_zone *zone = NULL;
+ struct blkz_buffer *buf;
+ unsigned long i;
+ ssize_t (*readop)(char *buf, size_t bytes, loff_t pos);
+ ssize_t rcnt;
+
+ readop = is_on_panic() ? info->panic_read : info->read;
+ if (!readop)
+ return -EINVAL;
+
+ for (i = 0; i < cxt->dmesg_max_cnt; i++) {
+ zone = cxt->dbzs[i];
+ if (unlikely(!zone))
+ return -EINVAL;
+ if (atomic_read(&zone->dirty)) {
+ unsigned int wcnt = cxt->dmesg_write_cnt;
+ struct blkz_zone *new = cxt->dbzs[wcnt];
+ int ret;
+
+ ret = blkz_move_zone(zone, new);
+ if (ret) {
+ pr_err("move zone from %lu to %d failed\n",
+ i, wcnt);
+ return ret;
+ }
+ cxt->dmesg_write_cnt = (wcnt + 1) % cxt->dmesg_max_cnt;
+ }
+ if (!zone->should_recover)
+ continue;
+ buf = zone->buffer;
+ rcnt = readop((char *)buf, zone->buffer_size + sizeof(*buf),
+ zone->off);
+ if (rcnt != zone->buffer_size + sizeof(*buf))
+ return (int)rcnt < 0 ? (int)rcnt : -EIO;
+ }
+ return 0;
+}
+
+/**
+ * blkz_recover_dmesg_meta: recover metadata of dmesg
+ *
+ * Recover metadata as follow:
+ * @cxt->dmesg_write_cnt
+ * @cxt->oops_counter
+ * @cxt->panic_counter
+ */
+static int blkz_recover_dmesg_meta(struct blkz_context *cxt)
+{
+ struct blkz_info *info = cxt->bzinfo;
+ struct blkz_zone *zone;
+ size_t rcnt, len;
+ struct blkz_buffer *buf;
+ struct blkz_dmesg_header *hdr;
+ ssize_t (*readop)(char *buf, size_t bytes, loff_t pos);
+ struct timespec64 time = {0};
+ unsigned long i;
+ /*
+ * Recover may on panic, we can't allocate any memory by kmalloc.
+ * So, we use local array instead.
+ */
+ char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0};
+
+ readop = is_on_panic() ? info->panic_read : info->read;
+ if (!readop)
+ return -EINVAL;
+
+ len = sizeof(*buf) + sizeof(*hdr);
+ buf = (struct blkz_buffer *)buffer_header;
+ for (i = 0; i < cxt->dmesg_max_cnt; i++) {
+ zone = cxt->dbzs[i];
+ if (unlikely(!zone))
+ return -EINVAL;
+
+ rcnt = readop((char *)buf, len, zone->off);
+ if (rcnt != len)
+ return (int)rcnt < 0 ? (int)rcnt : -EIO;
+
+ /*
+ * If sig NOT match, it means this zone never used before,
+ * because we write one by one, and we never modify sig even
+ * when erase. So, we do not need to check next one.
+ */
+ if (buf->sig != zone->buffer->sig) {
+ cxt->dmesg_write_cnt = i;
+ pr_debug("no valid data in dmesg zone %lu\n", i);
+ break;
+ }
+
+ if (zone->buffer_size < atomic_read(&buf->datalen)) {
+ pr_info("found overtop zone: %s: id %lu, off %lu, size %zu\n",
+ zone->name, i, zone->off,
+ zone->buffer_size);
+ continue;
+ }
+
+ hdr = (struct blkz_dmesg_header *)buf->data;
+ if (hdr->magic != DMESG_HEADER_MAGIC) {
+ pr_info("found invalid zone: %s: id %lu, off %lu, size %zu\n",
+ zone->name, i, zone->off,
+ zone->buffer_size);
+ continue;
+ }
+
+ /*
+ * we get the newest zone, and the next one must be the oldest
+ * or unused zone, because we do write one by one like a circle.
+ */
+ if (hdr->time.tv_sec >= time.tv_sec) {
+ time.tv_sec = hdr->time.tv_sec;
+ cxt->dmesg_write_cnt = (i + 1) % cxt->dmesg_max_cnt;
+ }
+
+ if (hdr->reason == KMSG_DUMP_OOPS)
+ cxt->oops_counter =
+ max(cxt->oops_counter, hdr->counter);
+ else
+ cxt->panic_counter =
+ max(cxt->panic_counter, hdr->counter);
+
+ if (!atomic_read(&buf->datalen)) {
+ pr_debug("found erased zone: %s: id %ld, off %lu, size %zu, datalen %d\n",
+ zone->name, i, zone->off,
+ zone->buffer_size,
+ atomic_read(&buf->datalen));
+ continue;
+ }
+
+ if (!is_on_panic())
+ zone->should_recover = true;
+ pr_debug("found nice zone: %s: id %ld, off %lu, size %zu, datalen %d\n",
+ zone->name, i, zone->off,
+ zone->buffer_size, atomic_read(&buf->datalen));
+ }
+
+ return 0;
+}
+
+static int blkz_recover_dmesg(struct blkz_context *cxt)
+{
+ int ret;
+
+ if (!cxt->dbzs)
+ return 0;
+
+ ret = blkz_recover_dmesg_meta(cxt);
+ if (ret)
+ goto recover_fail;
+
+ ret = blkz_recover_dmesg_data(cxt);
+ if (ret)
+ goto recover_fail;
+
+ return 0;
+recover_fail:
+ pr_debug("recover dmesg failed\n");
+ return ret;
+}
+
+static int blkz_recover_pmsg(struct blkz_context *cxt)
+{
+ struct blkz_info *info = cxt->bzinfo;
+ struct blkz_buffer *oldbuf;
+ struct blkz_zone *zone = NULL;
+ ssize_t (*readop)(char *buf, size_t bytes, loff_t pos);
+ int ret = 0;
+ ssize_t rcnt, len;
+
+ zone = cxt->pbz;
+ if (!zone || zone->oldbuf)
+ return 0;
+
+ if (is_on_panic())
+ goto out;
+
+ readop = info->read;
+ if (unlikely(!readop))
+ return -EINVAL;
+
+ len = zone->buffer_size + sizeof(*oldbuf);
+ oldbuf = kzalloc(len, GFP_KERNEL);
+ if (!oldbuf)
+ return -ENOMEM;
+
+ rcnt = readop((char *)oldbuf, len, zone->off);
+ if (rcnt != len) {
+ pr_debug("recover pmsg failed\n");
+ ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
+ goto free_oldbuf;
+ }
+
+ if (oldbuf->sig != zone->buffer->sig) {
+ pr_debug("no valid data in zone %s\n", zone->name);
+ goto free_oldbuf;
+ }
+
+ if (zone->buffer_size < atomic_read(&oldbuf->datalen) ||
+ zone->buffer_size < atomic_read(&oldbuf->start)) {
+ pr_info("found overtop zone: %s: off %lu, size %zu\n",
+ zone->name, zone->off, zone->buffer_size);
+ goto free_oldbuf;
+ }
+
+ if (!atomic_read(&oldbuf->datalen)) {
+ pr_debug("found erased zone: %s: id 0, off %lu, size %zu, datalen %d\n",
+ zone->name, zone->off, zone->buffer_size,
+ atomic_read(&oldbuf->datalen));
+ kfree(oldbuf);
+ goto out;
+ }
+
+ pr_debug("found nice zone: %s: id 0, off %lu, size %zu, datalen %d\n",
+ zone->name, zone->off, zone->buffer_size,
+ atomic_read(&oldbuf->datalen));
+ zone->oldbuf = oldbuf;
+out:
+ blkz_flush_dirty_zone(zone);
+ return 0;
+
+free_oldbuf:
+ kfree(oldbuf);
+ return ret;
+}
+
+static inline int blkz_recovery(struct blkz_context *cxt)
+{
+ int ret = -EBUSY;
+
+ if (atomic_read(&cxt->recovery))
+ return 0;
+
+ ret = blkz_recover_dmesg(cxt);
+ if (ret)
+ goto recover_fail;
+
+ ret = blkz_recover_pmsg(cxt);
+ if (ret)
+ goto recover_fail;
+
+ atomic_set(&cxt->recovery, 1);
+ pr_debug("recover end!\n");
+ return 0;
+
+recover_fail:
+ pr_debug("recovery failed, handle buffer\n");
+ return ret;
+}
+
+static int blkz_pstore_open(struct pstore_info *psi)
+{
+ struct blkz_context *cxt = psi->data;
+
+ cxt->dmesg_read_cnt = 0;
+ return 0;
+}
+
+static inline bool blkz_old_ok(struct blkz_zone *zone)
+{
+ if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen))
+ return true;
+ return false;
+}
+
+static inline bool blkz_ok(struct blkz_zone *zone)
+{
+ if (zone && zone->buffer && buffer_datalen(zone))
+ return true;
+ return false;
+}
+
+static int blkz_pstore_erase(struct pstore_record *record)
+{
+ struct blkz_context *cxt = record->psi->data;
+ struct blkz_zone *zone = NULL;
+
+ if (record->type == PSTORE_TYPE_DMESG) {
+ zone = cxt->dbzs[record->id];
+ if (unlikely(!blkz_ok(zone)))
+ return 0;
+
+ atomic_set(&zone->buffer->datalen, 0);
+ return blkz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ } else if (record->type == PSTORE_TYPE_PMSG) {
+ zone = cxt->pbz;
+ if (unlikely(!blkz_old_ok(zone)))
+ return 0;
+
+ kfree(zone->oldbuf);
+ zone->oldbuf = NULL;
+ /**
+ * if there are new data in zone buffer, that means the old data
+ * are already invalid. It is no need to flush 0 (erase) to
+ * block device.
+ */
+ if (!buffer_datalen(zone))
+ return blkz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ blkz_flush_dirty_zone(zone);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void blkz_write_kmsg_hdr(struct blkz_zone *zone,
+ struct pstore_record *record)
+{
+ struct blkz_context *cxt = record->psi->data;
+ struct blkz_buffer *buffer = zone->buffer;
+ struct blkz_dmesg_header *hdr =
+ (struct blkz_dmesg_header *)buffer->data;
+
+ hdr->magic = DMESG_HEADER_MAGIC;
+ hdr->compressed = record->compressed;
+ hdr->time.tv_sec = record->time.tv_sec;
+ hdr->time.tv_nsec = record->time.tv_nsec;
+ hdr->reason = record->reason;
+ if (hdr->reason == KMSG_DUMP_OOPS)
+ hdr->counter = ++cxt->oops_counter;
+ else if (hdr->reason == KMSG_DUMP_PANIC)
+ hdr->counter = ++cxt->panic_counter;
+ else if (hdr->reason == KMSG_DUMP_OOM)
+ hdr->counter = ++cxt->oom_counter;
+ else
+ hdr->counter = 0;
+}
+
+static int notrace blkz_dmesg_write(struct blkz_context *cxt,
+ struct pstore_record *record)
+{
+ struct blkz_info *info = cxt->bzinfo;
+ struct blkz_zone *zone;
+ size_t size, hlen;
+ int ret;
+
+ /*
+ * Out of the various dmesg dump types, pstore/blk is currently designed
+ * to only store crash logs, rather than storing general kernel logs.
+ */
+ if (record->reason != KMSG_DUMP_OOPS &&
+ record->reason != KMSG_DUMP_PANIC &&
+ record->reason != KMSG_DUMP_OOM)
+ return -EINVAL;
+
+ /* Skip Oopes when configured to do so. */
+ if (record->reason == KMSG_DUMP_OOPS && !info->dump_oops)
+ return -EINVAL;
+
+ /* Skip OOM when configured to do so. */
+ if (record->reason == KMSG_DUMP_OOM && !info->dump_oom)
+ return -EINVAL;
+
+ /*
+ * Explicitly only take the first part of any new crash.
+ * If our buffer is larger than kmsg_bytes, this can never happen,
+ * and if our buffer is smaller than kmsg_bytes, we don't want the
+ * report split across multiple records.
+ */
+ if (record->part != 1)
+ return -ENOSPC;
+
+ if (!cxt->dbzs)
+ return -ENOSPC;
+
+ zone = cxt->dbzs[cxt->dmesg_write_cnt];
+ if (!zone)
+ return -ENOSPC;
+
+ blkz_write_kmsg_hdr(zone, record);
+ hlen = sizeof(struct blkz_dmesg_header);
+ size = record->size;
+ if (size + hlen > zone->buffer_size)
+ size = zone->buffer_size - hlen;
+ ret = blkz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen);
+
+ pr_debug("write %s to zone id %d\n", zone->name, cxt->dmesg_write_cnt);
+ cxt->dmesg_write_cnt = (cxt->dmesg_write_cnt + 1) % cxt->dmesg_max_cnt;
+
+ if (!ret) {
+ pr_debug("try to flush dmesg dirty zones\n");
+ blkz_flush_dirty_zones(cxt->dbzs, cxt->dmesg_max_cnt);
+ }
+ return 0;
+}
+
+static int notrace blkz_pmsg_write(struct blkz_context *cxt,
+ struct pstore_record *record)
+{
+ struct blkz_zone *zone;
+ size_t start, rem;
+ int cnt = record->size;
+ bool is_full_data = false;
+ char *buf = record->buf;
+
+ zone = cxt->pbz;
+ if (!zone)
+ return -ENOSPC;
+
+ if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size)
+ is_full_data = true;
+
+ if (unlikely(cnt > zone->buffer_size)) {
+ buf += cnt - zone->buffer_size;
+ cnt = zone->buffer_size;
+ }
+
+ start = buffer_start(zone);
+ rem = zone->buffer_size - start;
+ if (unlikely(rem < cnt)) {
+ blkz_zone_write(zone, FLUSH_PART, buf, rem, start);
+ buf += rem;
+ cnt -= rem;
+ start = 0;
+ is_full_data = true;
+ }
+
+ atomic_set(&zone->buffer->start, cnt + start);
+ blkz_zone_write(zone, FLUSH_PART, buf, cnt, start);
+
+ /**
+ * blkz_zone_write will set datalen as start + cnt.
+ * It work if actual data length lesser than buffer size.
+ * If data length greater than buffer size, pmsg will rewrite to
+ * beginning of zone, which make buffer->datalen wrongly.
+ * So we should reset datalen as buffer size once actual data length
+ * greater than buffer size.
+ */
+ if (is_full_data) {
+ atomic_set(&zone->buffer->datalen, zone->buffer_size);
+ blkz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ }
+ return 0;
+}
+
+static int notrace blkz_pstore_write(struct pstore_record *record)
+{
+ struct blkz_context *cxt = record->psi->data;
+
+ if (record->type == PSTORE_TYPE_DMESG &&
+ record->reason == KMSG_DUMP_PANIC)
+ atomic_set(&cxt->on_panic, 1);
+
+ /*
+ * before write, we must recover from storage.
+ * if recover failed, handle buffer
+ */
+ blkz_recovery(cxt);
+
+ switch (record->type) {
+ case PSTORE_TYPE_DMESG:
+ return blkz_dmesg_write(cxt, record);
+ case PSTORE_TYPE_PMSG:
+ return blkz_pmsg_write(cxt, record);
+ default:
+ return -EINVAL;
+ }
+}
+
+#define READ_NEXT_ZONE ((ssize_t)(-1024))
+static struct blkz_zone *blkz_read_next_zone(struct blkz_context *cxt)
+{
+ struct blkz_zone *zone = NULL;
+
+ while (cxt->dmesg_read_cnt < cxt->dmesg_max_cnt) {
+ zone = cxt->dbzs[cxt->dmesg_read_cnt++];
+ if (blkz_ok(zone))
+ return zone;
+ }
+
+ if (cxt->pmsg_read_cnt == 0) {
+ cxt->pmsg_read_cnt++;
+ zone = cxt->pbz;
+ if (blkz_old_ok(zone))
+ return zone;
+ }
+
+ return NULL;
+}
+
+static int blkz_read_dmesg_hdr(struct blkz_zone *zone,
+ struct pstore_record *record)
+{
+ struct blkz_buffer *buffer = zone->buffer;
+ struct blkz_dmesg_header *hdr =
+ (struct blkz_dmesg_header *)buffer->data;
+
+ if (hdr->magic != DMESG_HEADER_MAGIC)
+ return -EINVAL;
+ record->compressed = hdr->compressed;
+ record->time.tv_sec = hdr->time.tv_sec;
+ record->time.tv_nsec = hdr->time.tv_nsec;
+ record->reason = hdr->reason;
+ record->count = hdr->counter;
+ return 0;
+}
+
+static ssize_t blkz_dmesg_read(struct blkz_zone *zone,
+ struct pstore_record *record)
+{
+ size_t size, hlen = 0;
+
+ size = buffer_datalen(zone);
+ /* Clear and skip this DMESG record if it has no valid header */
+ if (blkz_read_dmesg_hdr(zone, record)) {
+ atomic_set(&zone->buffer->datalen, 0);
+ atomic_set(&zone->dirty, 0);
+ return READ_NEXT_ZONE;
+ }
+ size -= sizeof(struct blkz_dmesg_header);
+
+ if (!record->compressed) {
+ char *buf = kasprintf(GFP_KERNEL,
+ "%s: Total %d times\n",
+ record->reason == KMSG_DUMP_OOPS ? "Oops" :
+ record->reason == KMSG_DUMP_PANIC ? "Panic" :
+ record->reason == KMSG_DUMP_OOM ? "OOM" :
+ "Unknown", record->count);
+ hlen = strlen(buf);
+ record->buf = krealloc(buf, hlen + size, GFP_KERNEL);
+ if (!record->buf) {
+ kfree(buf);
+ return -ENOMEM;
+ }
+ } else {
+ record->buf = kmalloc(size, GFP_KERNEL);
+ if (!record->buf)
+ return -ENOMEM;
+ }
+
+ if (unlikely(blkz_zone_read(zone, record->buf + hlen, size,
+ sizeof(struct blkz_dmesg_header)) < 0)) {
+ kfree(record->buf);
+ return READ_NEXT_ZONE;
+ }
+
+ return size + hlen;
+}
+
+static ssize_t blkz_pmsg_read(struct blkz_zone *zone,
+ struct pstore_record *record)
+{
+ size_t size, start;
+ struct blkz_buffer *buf;
+
+ buf = (struct blkz_buffer *)zone->oldbuf;
+ if (!buf)
+ return READ_NEXT_ZONE;
+
+ size = atomic_read(&buf->datalen);
+ start = atomic_read(&buf->start);
+
+ record->buf = kmalloc(size, GFP_KERNEL);
+ if (!record->buf)
+ return -ENOMEM;
+
+ memcpy(record->buf, buf->data + start, size - start);
+ memcpy(record->buf + size - start, buf->data, start);
+
+ return size;
+}
+
+static ssize_t blkz_pstore_read(struct pstore_record *record)
+{
+ struct blkz_context *cxt = record->psi->data;
+ ssize_t (*blkz_read)(struct blkz_zone *zone,
+ struct pstore_record *record);
+ struct blkz_zone *zone;
+ ssize_t ret;
+
+ /*
+ * before read, we must recover from storage.
+ * if recover failed, handle buffer
+ */
+ blkz_recovery(cxt);
+
+next_zone:
+ zone = blkz_read_next_zone(cxt);
+ if (!zone)
+ return 0;
+
+ record->type = zone->type;
+ switch (record->type) {
+ case PSTORE_TYPE_DMESG:
+ blkz_read = blkz_dmesg_read;
+ record->id = cxt->dmesg_read_cnt - 1;
+ break;
+ case PSTORE_TYPE_PMSG:
+ blkz_read = blkz_pmsg_read;
+ break;
+ default:
+ goto next_zone;
+ }
+
+ ret = blkz_read(zone, record);
+ if (ret == READ_NEXT_ZONE)
+ goto next_zone;
+ return ret;
+}
+
+static struct blkz_context blkz_cxt = {
+ .bzinfo_lock = __SPIN_LOCK_UNLOCKED(blkz_cxt.bzinfo_lock),
+ .recovery = ATOMIC_INIT(0),
+ .on_panic = ATOMIC_INIT(0),
+ .pstore = {
+ .owner = THIS_MODULE,
+ .name = MODNAME,
+ .open = blkz_pstore_open,
+ .read = blkz_pstore_read,
+ .write = blkz_pstore_write,
+ .erase = blkz_pstore_erase,
+ },
+};
+
+static long long blkz_blkdev_size(struct blkz_context *cxt)
+{
+ struct block_device *bdev = cxt->bdev;
+
+ if (!bdev)
+ return -EINVAL;
+
+ return (long long)part_nr_sects_read(bdev->bd_part) * SECTOR_SIZE;
+}
+
+/**
+ * blkz_get_bdev: initialize block device
+ *
+ * It uses name_to_dev_t to get dev_t, so it accpet the following variants:
+ * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
+ * no leading 0x, for example b302.
+ * 2) /dev/<disk_name> represents the device number of disk
+ * 3) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 4) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
+ * 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ * 7) <major>:<minor> major and minor number of the device separated by
+ * a colon.
+ */
+static int blkz_get_bdev(struct blkz_context *cxt, const char *dev)
+{
+ dev_t devt;
+ struct block_device *bdev;
+ const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+ if (!dev)
+ return -EINVAL;
+
+ devt = name_to_dev_t(dev);
+ if (!devt) {
+ pr_err("not found dev_t from %s\n", dev);
+ return -ENODEV;
+ }
+
+ bdev = blkdev_get_by_dev(devt, mode, cxt);
+ if (IS_ERR(bdev)) {
+ pr_err("get block_device by %u:%u failed\n",
+ MAJOR(devt), MINOR(devt));
+ return PTR_ERR(bdev);
+ }
+
+ cxt->bdev = bdev;
+ return 0;
+}
+
+static void blkz_put_bdev(struct blkz_context *cxt)
+{
+ const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+ if (cxt->bdev) {
+ blkdev_put(cxt->bdev, mode);
+ cxt->bdev = NULL;
+ }
+}
+
+static ssize_t blkz_default_general_read(char *buf, size_t bytes, loff_t pos)
+{
+ ssize_t ret;
+ struct blkz_context *cxt = &blkz_cxt;
+ struct file filp;
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ struct iovec iov = {
+ .iov_base = (void __user *)buf,
+ .iov_len = bytes
+ };
+
+ if (!cxt->bdev) {
+ struct blkz_info *info = cxt->bzinfo;
+
+ if (!info->blkdev)
+ return -ENODEV;
+ ret = blkz_get_bdev(cxt, info->blkdev);
+ if (ret)
+ return ret;
+ }
+
+ filp.f_mapping = cxt->bdev->bd_inode->i_mapping;
+ filp.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
+
+ init_sync_kiocb(&kiocb, &filp);
+ kiocb.ki_pos = pos;
+ iov_iter_init(&iter, READ, &iov, 1, bytes);
+
+ return generic_file_read_iter(&kiocb, &iter);
+}
+
+static ssize_t blkz_default_general_write(const char *buf, size_t bytes,
+ loff_t pos)
+{
+ struct blkz_context *cxt = &blkz_cxt;
+ struct iov_iter iter;
+ struct file filp;
+ ssize_t ret, written;
+ struct iovec iov = {
+ .iov_base = (void __user *)buf,
+ .iov_len = bytes
+ };
+ struct writeback_control wbc = {
+ .nr_to_write = LONG_MAX,
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = 0,
+ .range_end = LONG_MAX,
+ };
+
+ if (!cxt->bdev) {
+ struct blkz_info *info = cxt->bzinfo;
+
+ if (!info->blkdev)
+ return -ENODEV;
+ ret = blkz_get_bdev(cxt, info->blkdev);
+ if (ret)
+ return ret;
+ }
+
+ filp.f_mapping = cxt->bdev->bd_inode->i_mapping;
+ iov_iter_init(&iter, WRITE, &iov, 1, bytes);
+
+ written = generic_perform_write(&filp, &iter, pos);
+ ret = generic_writepages(filp.f_mapping, &wbc);
+
+ return ret < 0 ? ret : written;
+}
+
+static struct blkz_zone *blkz_init_zone(enum pstore_type_id type,
+ unsigned long *off, size_t size)
+{
+ struct blkz_info *info = blkz_cxt.bzinfo;
+ struct blkz_zone *zone;
+ const char *name = pstore_type_to_name(type);
+
+ if (!size)
+ return NULL;
+
+ if (*off + size > info->total_size) {
+ pr_err("no room for %s (0x%zx@0x%lx over 0x%lx)\n",
+ name, size, *off, info->total_size);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ zone = kzalloc(sizeof(struct blkz_zone), GFP_KERNEL);
+ if (!zone)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * NOTE: allocate buffer for blk zones for two reasons:
+ * 1. It can temporarily hold the data before
+ * blkz_default_general_read/write are useable.
+ * 2. It makes pstore usable even if no persistent storage. Most
+ * events of pstore except panic are suitable!!
+ */
+ zone->buffer = kmalloc(size, GFP_KERNEL);
+ if (!zone->buffer) {
+ kfree(zone);
+ return ERR_PTR(-ENOMEM);
+ }
+ memset(zone->buffer, 0xFF, size);
+ zone->off = *off;
+ zone->name = name;
+ zone->type = type;
+ zone->buffer_size = size - sizeof(struct blkz_buffer);
+ zone->buffer->sig = type ^ BLK_SIG;
+ zone->oldbuf = NULL;
+ atomic_set(&zone->dirty, 0);
+ atomic_set(&zone->buffer->datalen, 0);
+ atomic_set(&zone->buffer->start, 0);
+
+ *off += size;
+
+ pr_debug("blkzone %s: off 0x%lx, %zu header, %zu data\n", zone->name,
+ zone->off, sizeof(*zone->buffer), zone->buffer_size);
+ return zone;
+}
+
+static struct blkz_zone **blkz_init_zones(enum pstore_type_id type,
+ unsigned long *off, size_t total_size, ssize_t record_size,
+ unsigned int *cnt)
+{
+ struct blkz_info *info = blkz_cxt.bzinfo;
+ struct blkz_zone **zones, *zone;
+ const char *name = pstore_type_to_name(type);
+ int c, i;
+
+ if (!total_size || !record_size)
+ return NULL;
+
+ if (*off + total_size > info->total_size) {
+ pr_err("no room for zones %s (0x%zx@0x%lx over 0x%lx)\n",
+ name, total_size, *off, info->total_size);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ c = total_size / record_size;
+ zones = kcalloc(c, sizeof(*zones), GFP_KERNEL);
+ if (!zones) {
+ pr_err("allocate for zones %s failed\n", name);
+ return ERR_PTR(-ENOMEM);
+ }
+ memset(zones, 0, c * sizeof(*zones));
+
+ for (i = 0; i < c; i++) {
+ zone = blkz_init_zone(type, off, record_size);
+ if (!zone || IS_ERR(zone)) {
+ pr_err("initialize zones %s failed\n", name);
+ while (--i >= 0) {
+ kfree(zones[i]->buffer);
+ kfree(zones[i]);
+ }
+ kfree(zones);
+ return (void *)zone;
+ }
+ zones[i] = zone;
+ }
+
+ *cnt = c;
+ return zones;
+}
+
+static void blkz_free_zone(struct blkz_zone **blkzone)
+{
+ struct blkz_zone *zone = *blkzone;
+
+ if (!zone)
+ return;
+
+ kfree(zone->buffer);
+ kfree(zone);
+ *blkzone = NULL;
+}
+
+static void blkz_free_zones(struct blkz_zone ***blkzones, unsigned int *cnt)
+{
+ struct blkz_zone **zones = *blkzones;
+
+ if (!zones)
+ return;
+
+ while (*cnt > 0) {
+ blkz_free_zone(&zones[*cnt]);
+ (*cnt)--;
+ }
+ kfree(zones);
+ *blkzones = NULL;
+}
+
+static int blkz_cut_zones(struct blkz_context *cxt)
+{
+ struct blkz_info *info = cxt->bzinfo;
+ unsigned long off = 0;
+ int err;
+ size_t size;
+
+ size = info->total_size - info->pmsg_size;
+ cxt->dbzs = blkz_init_zones(PSTORE_TYPE_DMESG, &off, size,
+ info->dmesg_size, &cxt->dmesg_max_cnt);
+ if (IS_ERR(cxt->dbzs)) {
+ err = PTR_ERR(cxt->dbzs);
+ goto fail_out;
+ }
+
+ size = info->pmsg_size;
+ cxt->pbz = blkz_init_zone(PSTORE_TYPE_PMSG, &off, size);
+ if (IS_ERR(cxt->pbz)) {
+ err = PTR_ERR(cxt->pbz);
+ goto free_dmesg_zones;
+ }
+
+ return 0;
+free_dmesg_zones:
+ blkz_free_zones(&cxt->dbzs, &cxt->dmesg_max_cnt);
+fail_out:
+ return err;
+}
+
+int blkz_register(struct blkz_info *info)
+{
+ int err = -EINVAL;
+ struct blkz_context *cxt = &blkz_cxt;
+ struct module *owner = info->owner;
+
+ if (info->blkdev && !blkz_get_bdev(cxt, info->blkdev)) {
+ long long size;
+
+ size = blkz_blkdev_size(cxt);
+ if (size > 0 && (!info->total_size || info->total_size > size)) {
+ info->total_size = (unsigned long)size;
+ pr_info("total size %ld from block device %s\n",
+ info->total_size, info->blkdev);
+ }
+ pr_info("using block device %s\n", info->blkdev);
+ }
+
+ if (!info->total_size || (!info->dmesg_size && !info->pmsg_size)) {
+ pr_warn("The total size and the dmesg size must be non-zero\n");
+ return -EINVAL;
+ }
+
+ if (info->total_size < 4096) {
+ pr_err("total size must be over 4096 bytes\n");
+ return -EINVAL;
+ }
+
+#define check_size(name, size) { \
+ if (info->name & (size - 1)) { \
+ pr_err(#name " must be a multiple of %d\n", \
+ (size)); \
+ return -EINVAL; \
+ } \
+ }
+
+ check_size(total_size, 4096);
+ check_size(dmesg_size, SECTOR_SIZE);
+ check_size(pmsg_size, SECTOR_SIZE);
+
+#undef check_size
+
+ if (!info->read)
+ info->read = blkz_default_general_read;
+ if (!info->write)
+ info->write = blkz_default_general_write;
+
+ if (owner && !try_module_get(owner))
+ return -EINVAL;
+
+ spin_lock(&cxt->bzinfo_lock);
+ if (cxt->bzinfo) {
+ pr_warn("blk '%s' already loaded: ignoring '%s'\n",
+ cxt->bzinfo->name, info->name);
+ spin_unlock(&cxt->bzinfo_lock);
+ return -EBUSY;
+ }
+ cxt->bzinfo = info;
+ spin_unlock(&cxt->bzinfo_lock);
+
+ pr_debug("register %s with properties:\n", info->name);
+ pr_debug("\tblkdev : %s\n", info->blkdev);
+ pr_debug("\ttotal size : %ld Bytes\n", info->total_size);
+ pr_debug("\tdmesg size : %ld Bytes\n", info->dmesg_size);
+ pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size);
+
+ if (blkz_cut_zones(cxt)) {
+ pr_err("cut zones fialed\n");
+ goto fail_out;
+ }
+
+ if (info->dmesg_size) {
+ cxt->pstore.bufsize = cxt->dbzs[0]->buffer_size -
+ sizeof(struct blkz_dmesg_header);
+ cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
+ if (!cxt->pstore.buf) {
+ err = -ENOMEM;
+ goto fail_out;
+ }
+ }
+ cxt->pstore.data = cxt;
+ if (info->dmesg_size)
+ cxt->pstore.flags |= PSTORE_FLAGS_DMESG;
+ if (info->pmsg_size)
+ cxt->pstore.flags |= PSTORE_FLAGS_PMSG;
+
+ pr_info("Registered %s as blkzone backend for %s%s%s%s\n", info->name,
+ cxt->dbzs && cxt->bzinfo->dump_oops ? "Oops " : "",
+ cxt->dbzs && cxt->bzinfo->panic_write ? "Panic " : "",
+ cxt->dbzs && cxt->bzinfo->dump_oom ? "OOM " : "",
+ cxt->pbz ? "Pmsg" : "");
+
+ err = pstore_register(&cxt->pstore);
+ if (err) {
+ pr_err("registering with pstore failed\n");
+ goto free_pstore_buf;
+ }
+
+ module_put(owner);
+ return 0;
+
+free_pstore_buf:
+ kfree(cxt->pstore.buf);
+fail_out:
+ spin_lock(&blkz_cxt.bzinfo_lock);
+ blkz_cxt.bzinfo = NULL;
+ spin_unlock(&blkz_cxt.bzinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(blkz_register);
+
+void blkz_unregister(struct blkz_info *info)
+{
+ struct blkz_context *cxt = &blkz_cxt;
+
+ pstore_unregister(&cxt->pstore);
+ kfree(cxt->pstore.buf);
+ cxt->pstore.bufsize = 0;
+
+ spin_lock(&cxt->bzinfo_lock);
+ blkz_cxt.bzinfo = NULL;
+ spin_unlock(&cxt->bzinfo_lock);
+
+ blkz_free_zones(&cxt->dbzs, &cxt->dmesg_max_cnt);
+ blkz_free_zone(&cxt->pbz);
+ blkz_put_bdev(cxt);
+}
+EXPORT_SYMBOL_GPL(blkz_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("liaoweixiong <liaoweixiong@allwinnertech.com>");
+MODULE_DESCRIPTION("Block device Oops/Panic logger");