Commit 85a79128 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ubifs-for-linus-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs

Pull UBI and UBIFS updates from Richard Weinberger:
 "UBI:
   - Add Zhihao Cheng as reviewer
   - Attach via device tree
   - Add NVMEM layer
   - Various fastmap related fixes

  UBIFS:
   - Add Zhihao Cheng as reviewer
   - Convert to folios
   - Various fixes (memory leaks in error paths, function prototypes)"

* tag 'ubifs-for-linus-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs: (34 commits)
  mtd: ubi: fix NVMEM over UBI volumes on 32-bit systems
  mtd: ubi: provide NVMEM layer over UBI volumes
  mtd: ubi: populate ubi volume fwnode
  mtd: ubi: introduce pre-removal notification for UBI volumes
  mtd: ubi: attach from device tree
  mtd: ubi: block: use notifier to create ubiblock from parameter
  dt-bindings: mtd: ubi-volume: allow UBI volumes to provide NVMEM
  dt-bindings: mtd: add basic bindings for UBI
  ubifs: Queue up space reservation tasks if retrying many times
  ubifs: ubifs_symlink: Fix memleak of inode->i_link in error path
  ubifs: dbg_check_idx_size: Fix kmemleak if loading znode failed
  ubi: Correct the number of PEBs after a volume resize failure
  ubi: fix slab-out-of-bounds in ubi_eba_get_ldesc+0xfb/0x130
  ubi: correct the calculation of fastmap size
  ubifs: Remove unreachable code in dbg_check_ltab_lnum
  ubifs: fix function pointer cast warnings
  ubifs: fix sort function prototype
  ubi: Check for too small LEB size in VTBL code
  MAINTAINERS: Add Zhihao Cheng as UBI/UBIFS reviewer
  ubifs: Convert populate_page() to take a folio
  ...
parents cba9ffdb b8a77b9a
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/mtd/partitions/linux,ubi.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Unsorted Block Images
description: |
UBI ("Unsorted Block Images") is a volume management system for raw
flash devices which manages multiple logical volumes on a single
physical flash device and spreads the I/O load (i.e wear-leveling)
across the whole flash chip.
maintainers:
- Daniel Golle <daniel@makrotopia.org>
allOf:
- $ref: partition.yaml#
properties:
compatible:
const: linux,ubi
volumes:
type: object
description: UBI Volumes
patternProperties:
"^ubi-volume-.*$":
$ref: /schemas/mtd/partitions/ubi-volume.yaml#
unevaluatedProperties: false
required:
- compatible
unevaluatedProperties: false
examples:
- |
partitions {
compatible = "fixed-partitions";
#address-cells = <1>;
#size-cells = <1>;
partition@0 {
reg = <0x0 0x100000>;
label = "bootloader";
read-only;
};
partition@100000 {
reg = <0x100000 0x1ff00000>;
label = "ubi";
compatible = "linux,ubi";
volumes {
ubi-volume-caldata {
volid = <2>;
volname = "rf";
nvmem-layout {
compatible = "fixed-layout";
#address-cells = <1>;
#size-cells = <1>;
eeprom@0 {
reg = <0x0 0x1000>;
};
};
};
};
};
};
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/mtd/partitions/ubi-volume.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: UBI volume
description: |
This binding describes a single UBI volume. Volumes can be matches either
by their ID or their name, or both.
maintainers:
- Daniel Golle <daniel@makrotopia.org>
properties:
volid:
$ref: /schemas/types.yaml#/definitions/uint32
description:
Match UBI volume ID
volname:
$ref: /schemas/types.yaml#/definitions/string
description:
Match UBI volume ID
nvmem-layout:
$ref: /schemas/nvmem/layouts/nvmem-layout.yaml#
description:
This container may reference an NVMEM layout parser.
anyOf:
- required:
- volid
- required:
- volname
# This is a generic file other binding inherit from and extend
additionalProperties: true
......@@ -3,3 +3,13 @@
==========
Page Cache
==========
The page cache is the primary way that the user and the rest of the kernel
interact with filesystems. It can be bypassed (e.g. with O_DIRECT),
but normal reads, writes and mmaps go through the page cache.
Folios
======
The folio is the unit of memory management within the page cache.
Operations
......@@ -22573,6 +22573,7 @@ F: include/uapi/misc/uacce/
UBI FILE SYSTEM (UBIFS)
M: Richard Weinberger <richard@nod.at>
R: Zhihao Cheng <chengzhihao1@huawei.com>
L: linux-mtd@lists.infradead.org
S: Supported
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
......@@ -22718,6 +22719,7 @@ F: drivers/ufs/host/ufs-renesas.c
UNSORTED BLOCK IMAGES (UBI)
M: Richard Weinberger <richard@nod.at>
R: Zhihao Cheng <chengzhihao1@huawei.com>
L: linux-mtd@lists.infradead.org
S: Supported
W: http://www.linux-mtd.infradead.org/
......
......@@ -113,4 +113,17 @@ config MTD_UBI_FAULT_INJECTION
testing purposes.
If in doubt, say "N".
config MTD_UBI_NVMEM
tristate "UBI virtual NVMEM"
default n
depends on NVMEM
help
This option enabled an additional driver exposing UBI volumes as NVMEM
providers, intended for platforms where UBI is part of the firmware
specification and used to store also e.g. MAC addresses or board-
specific Wi-Fi calibration data.
If in doubt, say "N".
endif # MTD_UBI
......@@ -7,3 +7,4 @@ ubi-$(CONFIG_MTD_UBI_FASTMAP) += fastmap.o
ubi-$(CONFIG_MTD_UBI_BLOCK) += block.o
obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
obj-$(CONFIG_MTD_UBI_NVMEM) += nvmem.o
......@@ -65,10 +65,10 @@ struct ubiblock_pdu {
};
/* Numbers of elements set in the @ubiblock_param array */
static int ubiblock_devs __initdata;
static int ubiblock_devs;
/* MTD devices specification parameters */
static struct ubiblock_param ubiblock_param[UBIBLOCK_MAX_DEVICES] __initdata;
static struct ubiblock_param ubiblock_param[UBIBLOCK_MAX_DEVICES];
struct ubiblock {
struct ubi_volume_desc *desc;
......@@ -536,6 +536,70 @@ static int ubiblock_resize(struct ubi_volume_info *vi)
return 0;
}
static bool
match_volume_desc(struct ubi_volume_info *vi, const char *name, int ubi_num, int vol_id)
{
int err, len, cur_ubi_num, cur_vol_id;
if (ubi_num == -1) {
/* No ubi num, name must be a vol device path */
err = ubi_get_num_by_path(name, &cur_ubi_num, &cur_vol_id);
if (err || vi->ubi_num != cur_ubi_num || vi->vol_id != cur_vol_id)
return false;
return true;
}
if (vol_id == -1) {
/* Got ubi_num, but no vol_id, name must be volume name */
if (vi->ubi_num != ubi_num)
return false;
len = strnlen(name, UBI_VOL_NAME_MAX + 1);
if (len < 1 || vi->name_len != len)
return false;
if (strcmp(name, vi->name))
return false;
return true;
}
if (vi->ubi_num != ubi_num)
return false;
if (vi->vol_id != vol_id)
return false;
return true;
}
static void
ubiblock_create_from_param(struct ubi_volume_info *vi)
{
int i, ret = 0;
struct ubiblock_param *p;
/*
* Iterate over ubiblock cmdline parameters. If a parameter matches the
* newly added volume create the ubiblock device for it.
*/
for (i = 0; i < ubiblock_devs; i++) {
p = &ubiblock_param[i];
if (!match_volume_desc(vi, p->name, p->ubi_num, p->vol_id))
continue;
ret = ubiblock_create(vi);
if (ret) {
pr_err(
"UBI: block: can't add '%s' volume on ubi%d_%d, err=%d\n",
vi->name, p->ubi_num, p->vol_id, ret);
}
break;
}
}
static int ubiblock_notify(struct notifier_block *nb,
unsigned long notification_type, void *ns_ptr)
{
......@@ -543,10 +607,7 @@ static int ubiblock_notify(struct notifier_block *nb,
switch (notification_type) {
case UBI_VOLUME_ADDED:
/*
* We want to enforce explicit block device creation for
* volumes, so when a volume is added we do nothing.
*/
ubiblock_create_from_param(&nt->vi);
break;
case UBI_VOLUME_REMOVED:
ubiblock_remove(&nt->vi);
......@@ -572,56 +633,6 @@ static struct notifier_block ubiblock_notifier = {
.notifier_call = ubiblock_notify,
};
static struct ubi_volume_desc * __init
open_volume_desc(const char *name, int ubi_num, int vol_id)
{
if (ubi_num == -1)
/* No ubi num, name must be a vol device path */
return ubi_open_volume_path(name, UBI_READONLY);
else if (vol_id == -1)
/* No vol_id, must be vol_name */
return ubi_open_volume_nm(ubi_num, name, UBI_READONLY);
else
return ubi_open_volume(ubi_num, vol_id, UBI_READONLY);
}
static void __init ubiblock_create_from_param(void)
{
int i, ret = 0;
struct ubiblock_param *p;
struct ubi_volume_desc *desc;
struct ubi_volume_info vi;
/*
* If there is an error creating one of the ubiblocks, continue on to
* create the following ubiblocks. This helps in a circumstance where
* the kernel command-line specifies multiple block devices and some
* may be broken, but we still want the working ones to come up.
*/
for (i = 0; i < ubiblock_devs; i++) {
p = &ubiblock_param[i];
desc = open_volume_desc(p->name, p->ubi_num, p->vol_id);
if (IS_ERR(desc)) {
pr_err(
"UBI: block: can't open volume on ubi%d_%d, err=%ld\n",
p->ubi_num, p->vol_id, PTR_ERR(desc));
continue;
}
ubi_get_volume_info(desc, &vi);
ubi_close_volume(desc);
ret = ubiblock_create(&vi);
if (ret) {
pr_err(
"UBI: block: can't add '%s' volume on ubi%d_%d, err=%d\n",
vi.name, p->ubi_num, p->vol_id, ret);
continue;
}
}
}
static void ubiblock_remove_all(void)
{
struct ubiblock *next;
......@@ -647,18 +658,7 @@ int __init ubiblock_init(void)
if (ubiblock_major < 0)
return ubiblock_major;
/*
* Attach block devices from 'block=' module param.
* Even if one block device in the param list fails to come up,
* still allow the module to load and leave any others up.
*/
ubiblock_create_from_param();
/*
* Block devices are only created upon user requests, so we ignore
* existing volumes.
*/
ret = ubi_register_volume_notifier(&ubiblock_notifier, 1);
ret = ubi_register_volume_notifier(&ubiblock_notifier, 0);
if (ret)
goto err_unreg;
return 0;
......
......@@ -27,6 +27,7 @@
#include <linux/log2.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/major.h>
#include "ubi.h"
......@@ -92,7 +93,7 @@ static struct ubi_device *ubi_devices[UBI_MAX_DEVICES];
/* Serializes UBI devices creations and removals */
DEFINE_MUTEX(ubi_devices_mutex);
/* Protects @ubi_devices and @ubi->ref_count */
/* Protects @ubi_devices, @ubi->ref_count and @ubi->is_dead */
static DEFINE_SPINLOCK(ubi_devices_lock);
/* "Show" method for files in '/<sysfs>/class/ubi/' */
......@@ -260,6 +261,9 @@ struct ubi_device *ubi_get_device(int ubi_num)
spin_lock(&ubi_devices_lock);
ubi = ubi_devices[ubi_num];
if (ubi && ubi->is_dead)
ubi = NULL;
if (ubi) {
ubi_assert(ubi->ref_count >= 0);
ubi->ref_count += 1;
......@@ -297,7 +301,7 @@ struct ubi_device *ubi_get_by_major(int major)
spin_lock(&ubi_devices_lock);
for (i = 0; i < UBI_MAX_DEVICES; i++) {
ubi = ubi_devices[i];
if (ubi && MAJOR(ubi->cdev.dev) == major) {
if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) {
ubi_assert(ubi->ref_count >= 0);
ubi->ref_count += 1;
get_device(&ubi->dev);
......@@ -326,7 +330,7 @@ int ubi_major2num(int major)
for (i = 0; i < UBI_MAX_DEVICES; i++) {
struct ubi_device *ubi = ubi_devices[i];
if (ubi && MAJOR(ubi->cdev.dev) == major) {
if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) {
ubi_num = ubi->ubi_num;
break;
}
......@@ -513,7 +517,7 @@ static void ubi_free_volumes_from(struct ubi_device *ubi, int from)
int i;
for (i = from; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
if (!ubi->volumes[i])
if (!ubi->volumes[i] || ubi->volumes[i]->is_dead)
continue;
ubi_eba_replace_table(ubi->volumes[i], NULL);
ubi_fastmap_destroy_checkmap(ubi->volumes[i]);
......@@ -1098,7 +1102,6 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
return -EINVAL;
spin_lock(&ubi_devices_lock);
put_device(&ubi->dev);
ubi->ref_count -= 1;
if (ubi->ref_count) {
if (!anyway) {
......@@ -1109,6 +1112,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
ubi_err(ubi, "%s reference count %d, destroy anyway",
ubi->ubi_name, ubi->ref_count);
}
ubi->is_dead = true;
spin_unlock(&ubi_devices_lock);
ubi_notify_all(ubi, UBI_VOLUME_SHUTDOWN, NULL);
spin_lock(&ubi_devices_lock);
put_device(&ubi->dev);
ubi_devices[ubi_num] = NULL;
spin_unlock(&ubi_devices_lock);
......@@ -1219,43 +1229,43 @@ static struct mtd_info * __init open_mtd_device(const char *mtd_dev)
return mtd;
}
static int __init ubi_init(void)
static void ubi_notify_add(struct mtd_info *mtd)
{
int err, i, k;
struct device_node *np = mtd_get_of_node(mtd);
int err;
/* Ensure that EC and VID headers have correct size */
BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64);
BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64);
if (!of_device_is_compatible(np, "linux,ubi"))
return;
if (mtd_devs > UBI_MAX_DEVICES) {
pr_err("UBI error: too many MTD devices, maximum is %d\n",
UBI_MAX_DEVICES);
return -EINVAL;
}
/*
* we are already holding &mtd_table_mutex, but still need
* to bump refcount
*/
err = __get_mtd_device(mtd);
if (err)
return;
/* Create base sysfs directory and sysfs files */
err = class_register(&ubi_class);
/* called while holding mtd_table_mutex */
mutex_lock_nested(&ubi_devices_mutex, SINGLE_DEPTH_NESTING);
err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, 0, 0, false, false);
mutex_unlock(&ubi_devices_mutex);
if (err < 0)
return err;
err = misc_register(&ubi_ctrl_cdev);
if (err) {
pr_err("UBI error: cannot register device\n");
goto out;
}
__put_mtd_device(mtd);
}
ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab",
sizeof(struct ubi_wl_entry),
0, 0, NULL);
if (!ubi_wl_entry_slab) {
err = -ENOMEM;
goto out_dev_unreg;
}
static void ubi_notify_remove(struct mtd_info *mtd)
{
/* do nothing for now */
}
err = ubi_debugfs_init();
if (err)
goto out_slab;
static struct mtd_notifier ubi_mtd_notifier = {
.add = ubi_notify_add,
.remove = ubi_notify_remove,
};
static int __init ubi_init_attach(void)
{
int err, i, k;
/* Attach MTD devices */
for (i = 0; i < mtd_devs; i++) {
......@@ -1304,25 +1314,79 @@ static int __init ubi_init(void)
}
}
return 0;
out_detach:
for (k = 0; k < i; k++)
if (ubi_devices[k]) {
mutex_lock(&ubi_devices_mutex);
ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1);
mutex_unlock(&ubi_devices_mutex);
}
return err;
}
#ifndef CONFIG_MTD_UBI_MODULE
late_initcall(ubi_init_attach);
#endif
static int __init ubi_init(void)
{
int err;
/* Ensure that EC and VID headers have correct size */
BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64);
BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64);
if (mtd_devs > UBI_MAX_DEVICES) {
pr_err("UBI error: too many MTD devices, maximum is %d\n",
UBI_MAX_DEVICES);
return -EINVAL;
}
/* Create base sysfs directory and sysfs files */
err = class_register(&ubi_class);
if (err < 0)
return err;
err = misc_register(&ubi_ctrl_cdev);
if (err) {
pr_err("UBI error: cannot register device\n");
goto out;
}
ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab",
sizeof(struct ubi_wl_entry),
0, 0, NULL);
if (!ubi_wl_entry_slab) {
err = -ENOMEM;
goto out_dev_unreg;
}
err = ubi_debugfs_init();
if (err)
goto out_slab;
err = ubiblock_init();
if (err) {
pr_err("UBI error: block: cannot initialize, error %d\n", err);
/* See comment above re-ubi_is_module(). */
if (ubi_is_module())
goto out_detach;
goto out_slab;
}
register_mtd_user(&ubi_mtd_notifier);
if (ubi_is_module()) {
err = ubi_init_attach();
if (err)
goto out_mtd_notifier;
}
return 0;
out_detach:
for (k = 0; k < i; k++)
if (ubi_devices[k]) {
mutex_lock(&ubi_devices_mutex);
ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1);
mutex_unlock(&ubi_devices_mutex);
}
ubi_debugfs_exit();
out_mtd_notifier:
unregister_mtd_user(&ubi_mtd_notifier);
out_slab:
kmem_cache_destroy(ubi_wl_entry_slab);
out_dev_unreg:
......@@ -1332,13 +1396,15 @@ static int __init ubi_init(void)
pr_err("UBI error: cannot initialize UBI, error %d\n", err);
return err;
}
late_initcall(ubi_init);
device_initcall(ubi_init);
static void __exit ubi_exit(void)
{
int i;
ubiblock_exit();
unregister_mtd_user(&ubi_mtd_notifier);
for (i = 0; i < UBI_MAX_DEVICES; i++)
if (ubi_devices[i]) {
......
......@@ -1456,7 +1456,14 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
}
ubi_assert(vol->eba_tbl->entries[lnum].pnum == from);
/**
* The volumes_lock lock is needed here to prevent the expired old eba_tbl
* being updated when the eba_tbl is copied in the ubi_resize_volume() process.
*/
spin_lock(&ubi->volumes_lock);
vol->eba_tbl->entries[lnum].pnum = to;
spin_unlock(&ubi->volumes_lock);
out_unlock_buf:
mutex_unlock(&ubi->buf_mutex);
......
......@@ -85,9 +85,10 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi)
sizeof(struct ubi_fm_scan_pool) +
sizeof(struct ubi_fm_scan_pool) +
(ubi->peb_count * sizeof(struct ubi_fm_ec)) +
(sizeof(struct ubi_fm_eba) +
(ubi->peb_count * sizeof(__be32))) +
sizeof(struct ubi_fm_volhdr) * UBI_MAX_VOLUMES;
((sizeof(struct ubi_fm_eba) +
sizeof(struct ubi_fm_volhdr)) *
(UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT)) +
(ubi->peb_count * sizeof(__be32));
return roundup(size, ubi->leb_size);
}
......
......@@ -152,7 +152,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
spin_lock(&ubi->volumes_lock);
vol = ubi->volumes[vol_id];
if (!vol)
if (!vol || vol->is_dead)
goto out_unlock;
err = -EBUSY;
......@@ -279,6 +279,41 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
}
EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
/**
* ubi_get_num_by_path - get UBI device and volume number from device path
* @pathname: volume character device node path
* @ubi_num: pointer to UBI device number to be set
* @vol_id: pointer to UBI volume ID to be set
*
* Returns 0 on success and sets ubi_num and vol_id, returns error otherwise.
*/
int ubi_get_num_by_path(const char *pathname, int *ubi_num, int *vol_id)
{
int error;
struct path path;
struct kstat stat;
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
return error;
error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
path_put(&path);
if (error)
return error;
if (!S_ISCHR(stat.mode))
return -EINVAL;
*ubi_num = ubi_major2num(MAJOR(stat.rdev));
*vol_id = MINOR(stat.rdev) - 1;
if (*vol_id < 0 || *ubi_num < 0)
return -ENODEV;
return 0;
}
/**
* ubi_open_volume_path - open UBI volume by its character device node path.
* @pathname: volume character device node path
......@@ -290,32 +325,17 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
{
int error, ubi_num, vol_id;
struct path path;
struct kstat stat;
dbg_gen("open volume %s, mode %d", pathname, mode);
if (!pathname || !*pathname)
return ERR_PTR(-EINVAL);
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
return ERR_PTR(error);
error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
path_put(&path);
error = ubi_get_num_by_path(pathname, &ubi_num, &vol_id);
if (error)
return ERR_PTR(error);
if (!S_ISCHR(stat.mode))
return ERR_PTR(-EINVAL);
ubi_num = ubi_major2num(MAJOR(stat.rdev));
vol_id = MINOR(stat.rdev) - 1;
if (vol_id >= 0 && ubi_num >= 0)
return ubi_open_volume(ubi_num, vol_id, mode);
return ERR_PTR(-ENODEV);
return ubi_open_volume(ubi_num, vol_id, mode);
}
EXPORT_SYMBOL_GPL(ubi_open_volume_path);
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2023 Daniel Golle <daniel@makrotopia.org>
*/
/* UBI NVMEM provider */
#include "ubi.h"
#include <linux/nvmem-provider.h>
#include <asm/div64.h>
/* List of all NVMEM devices */
static LIST_HEAD(nvmem_devices);
static DEFINE_MUTEX(devices_mutex);
struct ubi_nvmem {
struct nvmem_device *nvmem;
int ubi_num;
int vol_id;
int usable_leb_size;
struct list_head list;
};
static int ubi_nvmem_reg_read(void *priv, unsigned int from,
void *val, size_t bytes)
{
size_t to_read, bytes_left = bytes;
struct ubi_nvmem *unv = priv;
struct ubi_volume_desc *desc;
uint32_t offs;
uint64_t lnum = from;
int err = 0;
desc = ubi_open_volume(unv->ubi_num, unv->vol_id, UBI_READONLY);
if (IS_ERR(desc))
return PTR_ERR(desc);
offs = do_div(lnum, unv->usable_leb_size);
while (bytes_left) {
to_read = unv->usable_leb_size - offs;
if (to_read > bytes_left)
to_read = bytes_left;
err = ubi_read(desc, lnum, val, offs, to_read);
if (err)
break;
lnum += 1;
offs = 0;
bytes_left -= to_read;
val += to_read;
}
ubi_close_volume(desc);
if (err)
return err;
return bytes_left == 0 ? 0 : -EIO;
}
static int ubi_nvmem_add(struct ubi_volume_info *vi)
{
struct device_node *np = dev_of_node(vi->dev);
struct nvmem_config config = {};
struct ubi_nvmem *unv;
int ret;
if (!np)
return 0;
if (!of_get_child_by_name(np, "nvmem-layout"))
return 0;
if (WARN_ON_ONCE(vi->usable_leb_size <= 0) ||
WARN_ON_ONCE(vi->size <= 0))
return -EINVAL;
unv = kzalloc(sizeof(struct ubi_nvmem), GFP_KERNEL);
if (!unv)
return -ENOMEM;
config.id = NVMEM_DEVID_NONE;
config.dev = vi->dev;
config.name = dev_name(vi->dev);
config.owner = THIS_MODULE;
config.priv = unv;
config.reg_read = ubi_nvmem_reg_read;
config.size = vi->usable_leb_size * vi->size;
config.word_size = 1;
config.stride = 1;
config.read_only = true;
config.root_only = true;
config.ignore_wp = true;
config.of_node = np;
unv->ubi_num = vi->ubi_num;
unv->vol_id = vi->vol_id;
unv->usable_leb_size = vi->usable_leb_size;
unv->nvmem = nvmem_register(&config);
if (IS_ERR(unv->nvmem)) {
ret = dev_err_probe(vi->dev, PTR_ERR(unv->nvmem),
"Failed to register NVMEM device\n");
kfree(unv);
return ret;
}
mutex_lock(&devices_mutex);
list_add_tail(&unv->list, &nvmem_devices);
mutex_unlock(&devices_mutex);
return 0;
}
static void ubi_nvmem_remove(struct ubi_volume_info *vi)
{
struct ubi_nvmem *unv_c, *unv = NULL;
mutex_lock(&devices_mutex);
list_for_each_entry(unv_c, &nvmem_devices, list)
if (unv_c->ubi_num == vi->ubi_num && unv_c->vol_id == vi->vol_id) {
unv = unv_c;
break;
}
if (!unv) {
mutex_unlock(&devices_mutex);
return;
}
list_del(&unv->list);
mutex_unlock(&devices_mutex);
nvmem_unregister(unv->nvmem);
kfree(unv);
}
/**
* nvmem_notify - UBI notification handler.
* @nb: registered notifier block
* @l: notification type
* @ns_ptr: pointer to the &struct ubi_notification object
*/
static int nvmem_notify(struct notifier_block *nb, unsigned long l,
void *ns_ptr)
{
struct ubi_notification *nt = ns_ptr;
switch (l) {
case UBI_VOLUME_RESIZED:
ubi_nvmem_remove(&nt->vi);
fallthrough;
case UBI_VOLUME_ADDED:
ubi_nvmem_add(&nt->vi);
break;
case UBI_VOLUME_SHUTDOWN:
ubi_nvmem_remove(&nt->vi);
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block nvmem_notifier = {
.notifier_call = nvmem_notify,
};
static int __init ubi_nvmem_init(void)
{
return ubi_register_volume_notifier(&nvmem_notifier, 0);
}
static void __exit ubi_nvmem_exit(void)
{
struct ubi_nvmem *unv, *tmp;
mutex_lock(&devices_mutex);
list_for_each_entry_safe(unv, tmp, &nvmem_devices, list) {
nvmem_unregister(unv->nvmem);
list_del(&unv->list);
kfree(unv);
}
mutex_unlock(&devices_mutex);
ubi_unregister_volume_notifier(&nvmem_notifier);
}
module_init(ubi_nvmem_init);
module_exit(ubi_nvmem_exit);
MODULE_DESCRIPTION("NVMEM layer over UBI volumes");
MODULE_AUTHOR("Daniel Golle");
MODULE_LICENSE("GPL");
......@@ -337,6 +337,7 @@ struct ubi_volume {
int writers;
int exclusive;
int metaonly;
bool is_dead;
int reserved_pebs;
int vol_type;
......@@ -561,6 +562,7 @@ struct ubi_device {
spinlock_t volumes_lock;
int ref_count;
int image_seq;
bool is_dead;
int rsvd_pebs;
int avail_pebs;
......@@ -955,6 +957,7 @@ void ubi_free_internal_volumes(struct ubi_device *ubi);
void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
struct ubi_volume_info *vi);
int ubi_get_num_by_path(const char *pathname, int *ubi_num, int *vol_id);
/* scan.c */
int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
int pnum, const struct ubi_vid_hdr *vid_hdr);
......
......@@ -59,7 +59,7 @@ static ssize_t vol_attribute_show(struct device *dev,
struct ubi_device *ubi = vol->ubi;
spin_lock(&ubi->volumes_lock);
if (!ubi->volumes[vol->vol_id]) {
if (!ubi->volumes[vol->vol_id] || ubi->volumes[vol->vol_id]->is_dead) {
spin_unlock(&ubi->volumes_lock);
return -ENODEV;
}
......@@ -124,6 +124,31 @@ static void vol_release(struct device *dev)
kfree(vol);
}
static struct fwnode_handle *find_volume_fwnode(struct ubi_volume *vol)
{
struct fwnode_handle *fw_vols, *fw_vol;
const char *volname;
u32 volid;
fw_vols = device_get_named_child_node(vol->dev.parent->parent, "volumes");
if (!fw_vols)
return NULL;
fwnode_for_each_child_node(fw_vols, fw_vol) {
if (!fwnode_property_read_string(fw_vol, "volname", &volname) &&
strncmp(volname, vol->name, vol->name_len))
continue;
if (!fwnode_property_read_u32(fw_vol, "volid", &volid) &&
vol->vol_id != volid)
continue;
return fw_vol;
}
return NULL;
}
/**
* ubi_create_volume - create volume.
* @ubi: UBI device description object
......@@ -189,7 +214,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
/* Ensure that the name is unique */
for (i = 0; i < ubi->vtbl_slots; i++)
if (ubi->volumes[i] &&
if (ubi->volumes[i] && !ubi->volumes[i]->is_dead &&
ubi->volumes[i]->name_len == req->name_len &&
!strcmp(ubi->volumes[i]->name, req->name)) {
ubi_err(ubi, "volume \"%s\" exists (ID %d)",
......@@ -223,6 +248,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
vol->name_len = req->name_len;
memcpy(vol->name, req->name, vol->name_len);
vol->ubi = ubi;
device_set_node(&vol->dev, find_volume_fwnode(vol));
/*
* Finish all pending erases because there may be some LEBs belonging
......@@ -352,6 +378,19 @@ int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
err = -EBUSY;
goto out_unlock;
}
/*
* Mark volume as dead at this point to prevent that anyone
* can take a reference to the volume from now on.
* This is necessary as we have to release the spinlock before
* calling ubi_volume_notify.
*/
vol->is_dead = true;
spin_unlock(&ubi->volumes_lock);
ubi_volume_notify(ubi, vol, UBI_VOLUME_SHUTDOWN);
spin_lock(&ubi->volumes_lock);
ubi->volumes[vol_id] = NULL;
spin_unlock(&ubi->volumes_lock);
......@@ -408,6 +447,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
struct ubi_device *ubi = vol->ubi;
struct ubi_vtbl_record vtbl_rec;
struct ubi_eba_table *new_eba_tbl = NULL;
struct ubi_eba_table *old_eba_tbl = NULL;
int vol_id = vol->vol_id;
if (ubi->ro_mode)
......@@ -453,10 +493,13 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
err = -ENOSPC;
goto out_free;
}
ubi->avail_pebs -= pebs;
ubi->rsvd_pebs += pebs;
ubi_eba_copy_table(vol, new_eba_tbl, vol->reserved_pebs);
ubi_eba_replace_table(vol, new_eba_tbl);
old_eba_tbl = vol->eba_tbl;
vol->eba_tbl = new_eba_tbl;
vol->reserved_pebs = reserved_pebs;
spin_unlock(&ubi->volumes_lock);
}
......@@ -471,7 +514,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
ubi->avail_pebs -= pebs;
ubi_update_reserved(ubi);
ubi_eba_copy_table(vol, new_eba_tbl, reserved_pebs);
ubi_eba_replace_table(vol, new_eba_tbl);
old_eba_tbl = vol->eba_tbl;
vol->eba_tbl = new_eba_tbl;
vol->reserved_pebs = reserved_pebs;
spin_unlock(&ubi->volumes_lock);
}
......@@ -493,7 +538,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
if (err)
goto out_acc;
vol->reserved_pebs = reserved_pebs;
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
vol->used_ebs = reserved_pebs;
vol->last_eb_bytes = vol->usable_leb_size;
......@@ -501,19 +545,23 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
(long long)vol->used_ebs * vol->usable_leb_size;
}
/* destroy old table */
ubi_eba_destroy_table(old_eba_tbl);
ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED);
self_check_volumes(ubi);
return err;
out_acc:
if (pebs > 0) {
spin_lock(&ubi->volumes_lock);
ubi->rsvd_pebs -= pebs;
ubi->avail_pebs += pebs;
spin_unlock(&ubi->volumes_lock);
}
return err;
spin_lock(&ubi->volumes_lock);
vol->reserved_pebs = reserved_pebs - pebs;
ubi->rsvd_pebs -= pebs;
ubi->avail_pebs += pebs;
if (pebs > 0)
ubi_eba_copy_table(vol, old_eba_tbl, vol->reserved_pebs);
else
ubi_eba_copy_table(vol, old_eba_tbl, reserved_pebs);
vol->eba_tbl = old_eba_tbl;
spin_unlock(&ubi->volumes_lock);
out_free:
ubi_eba_destroy_table(new_eba_tbl);
return err;
......@@ -592,6 +640,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
vol->dev.class = &ubi_class;
vol->dev.groups = volume_dev_groups;
dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);
device_set_node(&vol->dev, find_volume_fwnode(vol));
err = device_register(&vol->dev);
if (err) {
cdev_del(&vol->cdev);
......
......@@ -791,6 +791,12 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai)
* The number of supported volumes is limited by the eraseblock size
* and by the UBI_MAX_VOLUMES constant.
*/
if (ubi->leb_size < UBI_VTBL_RECORD_SIZE) {
ubi_err(ubi, "LEB size too small for a volume record");
return -EINVAL;
}
ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE;
if (ubi->vtbl_slots > UBI_MAX_VOLUMES)
ubi->vtbl_slots = UBI_MAX_VOLUMES;
......
......@@ -1742,17 +1742,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
err = dbg_walk_index(c, NULL, add_size, &calc);
if (err) {
ubifs_err(c, "error %d while walking the index", err);
return err;
goto out_err;
}
if (calc != idx_size) {
ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
calc, idx_size);
dump_stack();
return -EINVAL;
err = -EINVAL;
goto out_err;
}
return 0;
out_err:
ubifs_destroy_tnc_tree(c);
return err;
}
/**
......
......@@ -1133,6 +1133,8 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
out_inode:
/* Free inode->i_link before inode is marked as bad. */
fscrypt_free_inode(inode);
make_bad_inode(inode);
iput(inode);
out_fname:
......
......@@ -96,36 +96,36 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
return -EINVAL;
}
static int do_readpage(struct page *page)
static int do_readpage(struct folio *folio)
{
void *addr;
int err = 0, i;
unsigned int block, beyond;
struct ubifs_data_node *dn;
struct inode *inode = page->mapping->host;
struct ubifs_data_node *dn = NULL;
struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
loff_t i_size = i_size_read(inode);
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
inode->i_ino, page->index, i_size, page->flags);
ubifs_assert(c, !PageChecked(page));
ubifs_assert(c, !PagePrivate(page));
inode->i_ino, folio->index, i_size, folio->flags);
ubifs_assert(c, !folio_test_checked(folio));
ubifs_assert(c, !folio->private);
addr = kmap(page);
addr = kmap_local_folio(folio, 0);
block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
if (block >= beyond) {
/* Reading beyond inode */
SetPageChecked(page);
memset(addr, 0, PAGE_SIZE);
folio_set_checked(folio);
addr = folio_zero_tail(folio, 0, addr);
goto out;
}
dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
if (!dn) {
err = -ENOMEM;
goto error;
goto out;
}
i = 0;
......@@ -150,39 +150,35 @@ static int do_readpage(struct page *page)
memset(addr + ilen, 0, dlen - ilen);
}
}
if (++i >= UBIFS_BLOCKS_PER_PAGE)
if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio)))
break;
block += 1;
addr += UBIFS_BLOCK_SIZE;
if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
kunmap_local(addr - UBIFS_BLOCK_SIZE);
addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
}
}
if (err) {
struct ubifs_info *c = inode->i_sb->s_fs_info;
if (err == -ENOENT) {
/* Not found, so it must be a hole */
SetPageChecked(page);
folio_set_checked(folio);
dbg_gen("hole");
goto out_free;
err = 0;
} else {
ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
folio->index, inode->i_ino, err);
}
ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
page->index, inode->i_ino, err);
goto error;
}
out_free:
kfree(dn);
out:
SetPageUptodate(page);
ClearPageError(page);
flush_dcache_page(page);
kunmap(page);
return 0;
error:
kfree(dn);
ClearPageUptodate(page);
SetPageError(page);
flush_dcache_page(page);
kunmap(page);
if (!err)
folio_mark_uptodate(folio);
flush_dcache_folio(folio);
kunmap_local(addr);
return err;
}
......@@ -222,16 +218,16 @@ static int write_begin_slow(struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
struct ubifs_budget_req req = { .new_page = 1 };
int err, appending = !!(pos + len > inode->i_size);
struct page *page;
struct folio *folio;
dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
inode->i_ino, pos, len, inode->i_size);
/*
* At the slow path we have to budget before locking the page, because
* budgeting may force write-back, which would wait on locked pages and
* deadlock if we had the page locked. At this point we do not know
* anything about the page, so assume that this is a new page which is
* At the slow path we have to budget before locking the folio, because
* budgeting may force write-back, which would wait on locked folios and
* deadlock if we had the folio locked. At this point we do not know
* anything about the folio, so assume that this is a new folio which is
* written to a hole. This corresponds to largest budget. Later the
* budget will be amended if this is not true.
*/
......@@ -243,45 +239,43 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err))
return err;
page = grab_cache_page_write_begin(mapping, index);
if (unlikely(!page)) {
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio)) {
ubifs_release_budget(c, &req);
return -ENOMEM;
return PTR_ERR(folio);
}
if (!PageUptodate(page)) {
if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
SetPageChecked(page);
if (!folio_test_uptodate(folio)) {
if (pos == folio_pos(folio) && len >= folio_size(folio))
folio_set_checked(folio);
else {
err = do_readpage(page);
err = do_readpage(folio);
if (err) {
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
ubifs_release_budget(c, &req);
return err;
}
}
SetPageUptodate(page);
ClearPageError(page);
}
if (PagePrivate(page))
if (folio->private)
/*
* The page is dirty, which means it was budgeted twice:
* The folio is dirty, which means it was budgeted twice:
* o first time the budget was allocated by the task which
* made the page dirty and set the PG_private flag;
* made the folio dirty and set the private field;
* o and then we budgeted for it for the second time at the
* very beginning of this function.
*
* So what we have to do is to release the page budget we
* So what we have to do is to release the folio budget we
* allocated.
*/
release_new_page_budget(c);
else if (!PageChecked(page))
else if (!folio_test_checked(folio))
/*
* We are changing a page which already exists on the media.
* This means that changing the page does not make the amount
* We are changing a folio which already exists on the media.
* This means that changing the folio does not make the amount
* of indexing information larger, and this part of the budget
* which we have already acquired may be released.
*/
......@@ -304,14 +298,14 @@ static int write_begin_slow(struct address_space *mapping,
ubifs_release_dirty_inode_budget(c, ui);
}
*pagep = page;
*pagep = &folio->page;
return 0;
}
/**
* allocate_budget - allocate budget for 'ubifs_write_begin()'.
* @c: UBIFS file-system description object
* @page: page to allocate budget for
* @folio: folio to allocate budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
......@@ -322,15 +316,15 @@ static int write_begin_slow(struct address_space *mapping,
*
* Returns: %0 in case of success and %-ENOSPC in case of failure.
*/
static int allocate_budget(struct ubifs_info *c, struct page *page,
static int allocate_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
struct ubifs_budget_req req = { .fast = 1 };
if (PagePrivate(page)) {
if (folio->private) {
if (!appending)
/*
* The page is dirty and we are not appending, which
* The folio is dirty and we are not appending, which
* means no budget is needed at all.
*/
return 0;
......@@ -354,11 +348,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page,
*/
req.dirtied_ino = 1;
} else {
if (PageChecked(page))
if (folio_test_checked(folio))
/*
* The page corresponds to a hole and does not
* exist on the media. So changing it makes
* make the amount of indexing information
* the amount of indexing information
* larger, and we have to budget for a new
* page.
*/
......@@ -428,7 +422,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
int err, appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
struct page *page;
struct folio *folio;
ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
ubifs_assert(c, !c->ro_media && !c->ro_mount);
......@@ -437,13 +431,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return -EROFS;
/* Try out the fast-path part first */
page = grab_cache_page_write_begin(mapping, index);
if (unlikely(!page))
return -ENOMEM;
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
/* The page is not loaded from the flash */
if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
if (pos == folio_pos(folio) && len >= folio_size(folio)) {
/*
* We change whole page so no need to load it. But we
* do not know whether this page exists on the media or
......@@ -453,32 +448,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* media. Thus, we are setting the @PG_checked flag
* here.
*/
SetPageChecked(page);
folio_set_checked(folio);
skipped_read = 1;
} else {
err = do_readpage(page);
err = do_readpage(folio);
if (err) {
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return err;
}
}
SetPageUptodate(page);
ClearPageError(page);
}
err = allocate_budget(c, page, ui, appending);
err = allocate_budget(c, folio, ui, appending);
if (unlikely(err)) {
ubifs_assert(c, err == -ENOSPC);
/*
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
*/
if (skipped_read) {
ClearPageChecked(page);
ClearPageUptodate(page);
}
if (skipped_read)
folio_clear_checked(folio);
/*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
......@@ -490,8 +480,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
mutex_unlock(&ui->ui_mutex);
}
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return write_begin_slow(mapping, pos, len, pagep);
}
......@@ -502,22 +492,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
*pagep = page;
*pagep = &folio->page;
return 0;
}
/**
* cancel_budget - cancel budget.
* @c: UBIFS file-system description object
* @page: page to cancel budget for
* @folio: folio to cancel budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
* This is a helper function for a page write operation. It unlocks the
* @ui->ui_mutex in case of appending.
*/
static void cancel_budget(struct ubifs_info *c, struct page *page,
static void cancel_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
if (appending) {
......@@ -525,8 +514,8 @@ static void cancel_budget(struct ubifs_info *c, struct page *page,
ubifs_release_dirty_inode_budget(c, ui);
mutex_unlock(&ui->ui_mutex);
}
if (!PagePrivate(page)) {
if (PageChecked(page))
if (!folio->private) {
if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
......@@ -537,6 +526,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
......@@ -544,44 +534,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
int appending = !!(end_pos > inode->i_size);
dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
inode->i_ino, pos, page->index, len, copied, inode->i_size);
inode->i_ino, pos, folio->index, len, copied, inode->i_size);
if (unlikely(copied < len && len == PAGE_SIZE)) {
if (unlikely(copied < len && !folio_test_uptodate(folio))) {
/*
* VFS copied less data to the page that it intended and
* VFS copied less data to the folio than it intended and
* declared in its '->write_begin()' call via the @len
* argument. If the page was not up-to-date, and @len was
* @PAGE_SIZE, the 'ubifs_write_begin()' function did
* argument. If the folio was not up-to-date,
* the 'ubifs_write_begin()' function did
* not load it from the media (for optimization reasons). This
* means that part of the page contains garbage. So read the
* page now.
* means that part of the folio contains garbage. So read the
* folio now.
*/
dbg_gen("copied %d instead of %d, read page and repeat",
copied, len);
cancel_budget(c, page, ui, appending);
ClearPageChecked(page);
cancel_budget(c, folio, ui, appending);
folio_clear_checked(folio);
/*
* Return 0 to force VFS to repeat the whole operation, or the
* error code if 'do_readpage()' fails.
*/
copied = do_readpage(page);
copied = do_readpage(folio);
goto out;
}
if (!PagePrivate(page)) {
attach_page_private(page, (void *)1);
if (len == folio_size(folio))
folio_mark_uptodate(folio);
if (!folio->private) {
folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
__set_page_dirty_nobuffers(page);
filemap_dirty_folio(mapping, folio);
}
if (appending) {
i_size_write(inode, end_pos);
ui->ui_size = end_pos;
/*
* Note, we do not set @I_DIRTY_PAGES (which means that the
* inode has dirty pages), this has been done in
* '__set_page_dirty_nobuffers()'.
* We do not set @I_DIRTY_PAGES (which means that
* the inode has dirty pages), this was done in
* filemap_dirty_folio().
*/
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
......@@ -589,43 +582,43 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
}
out:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return copied;
}
/**
* populate_page - copy data nodes into a page for bulk-read.
* @c: UBIFS file-system description object
* @page: page
* @folio: folio
* @bu: bulk-read information
* @n: next zbranch slot
*
* Returns: %0 on success and a negative error code on failure.
*/
static int populate_page(struct ubifs_info *c, struct page *page,
static int populate_page(struct ubifs_info *c, struct folio *folio,
struct bu_info *bu, int *n)
{
int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
unsigned int page_block;
void *addr, *zaddr;
pgoff_t end_index;
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
inode->i_ino, page->index, i_size, page->flags);
inode->i_ino, folio->index, i_size, folio->flags);
addr = zaddr = kmap(page);
addr = zaddr = kmap_local_folio(folio, 0);
end_index = (i_size - 1) >> PAGE_SHIFT;
if (!i_size || page->index > end_index) {
if (!i_size || folio->index > end_index) {
hole = 1;
memset(addr, 0, PAGE_SIZE);
addr = folio_zero_tail(folio, 0, addr);
goto out_hole;
}
page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
while (1) {
int err, len, out_len, dlen;
......@@ -674,9 +667,13 @@ static int populate_page(struct ubifs_info *c, struct page *page,
break;
addr += UBIFS_BLOCK_SIZE;
page_block += 1;
if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
kunmap_local(addr - UBIFS_BLOCK_SIZE);
addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
}
}
if (end_index == page->index) {
if (end_index == folio->index) {
int len = i_size & (PAGE_SIZE - 1);
if (len && len < read)
......@@ -685,22 +682,19 @@ static int populate_page(struct ubifs_info *c, struct page *page,
out_hole:
if (hole) {
SetPageChecked(page);
folio_set_checked(folio);
dbg_gen("hole");
}
SetPageUptodate(page);
ClearPageError(page);
flush_dcache_page(page);
kunmap(page);
folio_mark_uptodate(folio);
flush_dcache_folio(folio);
kunmap_local(addr);
*n = nn;
return 0;
out_err:
ClearPageUptodate(page);
SetPageError(page);
flush_dcache_page(page);
kunmap(page);
flush_dcache_folio(folio);
kunmap_local(addr);
ubifs_err(c, "bad data node (block %u, inode %lu)",
page_block, inode->i_ino);
return -EINVAL;
......@@ -710,15 +704,15 @@ static int populate_page(struct ubifs_info *c, struct page *page,
* ubifs_do_bulk_read - do bulk-read.
* @c: UBIFS file-system description object
* @bu: bulk-read information
* @page1: first page to read
* @folio1: first folio to read
*
* Returns: %1 if the bulk-read is done, otherwise %0 is returned.
*/
static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
struct page *page1)
struct folio *folio1)
{
pgoff_t offset = page1->index, end_index;
struct address_space *mapping = page1->mapping;
pgoff_t offset = folio1->index, end_index;
struct address_space *mapping = folio1->mapping;
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
int err, page_idx, page_cnt, ret = 0, n = 0;
......@@ -768,11 +762,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
goto out_warn;
}
err = populate_page(c, page1, bu, &n);
err = populate_page(c, folio1, bu, &n);
if (err)
goto out_warn;
unlock_page(page1);
folio_unlock(folio1);
ret = 1;
isize = i_size_read(inode);
......@@ -782,19 +776,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
for (page_idx = 1; page_idx < page_cnt; page_idx++) {
pgoff_t page_offset = offset + page_idx;
struct page *page;
struct folio *folio;
if (page_offset > end_index)
break;
page = pagecache_get_page(mapping, page_offset,
folio = __filemap_get_folio(mapping, page_offset,
FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
ra_gfp_mask);
if (!page)
if (IS_ERR(folio))
break;
if (!PageUptodate(page))
err = populate_page(c, page, bu, &n);
unlock_page(page);
put_page(page);
if (!folio_test_uptodate(folio))
err = populate_page(c, folio, bu, &n);
folio_unlock(folio);
folio_put(folio);
if (err)
break;
}
......@@ -817,7 +811,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
/**
* ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
* @page: page from which to start bulk-read.
* @folio: folio from which to start bulk-read.
*
* Some flash media are capable of reading sequentially at faster rates. UBIFS
* bulk-read facility is designed to take advantage of that, by reading in one
......@@ -826,12 +820,12 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
*
* Returns: %1 if a bulk-read is done and %0 otherwise.
*/
static int ubifs_bulk_read(struct page *page)
static int ubifs_bulk_read(struct folio *folio)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = page->index, last_page_read = ui->last_page_read;
pgoff_t index = folio->index, last_page_read = ui->last_page_read;
struct bu_info *bu;
int err = 0, allocated = 0;
......@@ -879,8 +873,8 @@ static int ubifs_bulk_read(struct page *page)
bu->buf_len = c->max_bu_buf_len;
data_key_init(c, &bu->key, inode->i_ino,
page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
err = ubifs_do_bulk_read(c, bu, page);
folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
err = ubifs_do_bulk_read(c, bu, folio);
if (!allocated)
mutex_unlock(&c->bu_mutex);
......@@ -894,69 +888,71 @@ static int ubifs_bulk_read(struct page *page)
static int ubifs_read_folio(struct file *file, struct folio *folio)
{
struct page *page = &folio->page;
if (ubifs_bulk_read(page))
if (ubifs_bulk_read(folio))
return 0;
do_readpage(page);
do_readpage(folio);
folio_unlock(folio);
return 0;
}
static int do_writepage(struct page *page, int len)
static int do_writepage(struct folio *folio, size_t len)
{
int err = 0, i, blen;
int err = 0, blen;
unsigned int block;
void *addr;
size_t offset = 0;
union ubifs_key key;
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
#ifdef UBIFS_DEBUG
struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
/* Update radix tree tags */
set_page_writeback(page);
folio_start_writeback(folio);
addr = kmap(page);
block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
i = 0;
while (len) {
blen = min_t(int, len, UBIFS_BLOCK_SIZE);
addr = kmap_local_folio(folio, offset);
block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
for (;;) {
blen = min_t(size_t, len, UBIFS_BLOCK_SIZE);
data_key_init(c, &key, inode->i_ino, block);
err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
if (err)
break;
if (++i >= UBIFS_BLOCKS_PER_PAGE)
len -= blen;
if (!len)
break;
block += 1;
addr += blen;
len -= blen;
if (folio_test_highmem(folio) && !offset_in_page(addr)) {
kunmap_local(addr - blen);
offset += PAGE_SIZE;
addr = kmap_local_folio(folio, offset);
}
}
kunmap_local(addr);
if (err) {
SetPageError(page);
ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
page->index, inode->i_ino, err);
mapping_set_error(folio->mapping, err);
ubifs_err(c, "cannot write folio %lu of inode %lu, error %d",
folio->index, inode->i_ino, err);
ubifs_ro_mode(c, err);
}
ubifs_assert(c, PagePrivate(page));
if (PageChecked(page))
ubifs_assert(c, folio->private != NULL);
if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
atomic_long_dec(&c->dirty_pg_cnt);
detach_page_private(page);
ClearPageChecked(page);
folio_detach_private(folio);
folio_clear_checked(folio);
kunmap(page);
unlock_page(page);
end_page_writeback(page);
folio_unlock(folio);
folio_end_writeback(folio);
return err;
}
......@@ -1006,22 +1002,21 @@ static int do_writepage(struct page *page, int len)
* on the page lock and it would not write the truncated inode node to the
* journal before we have finished.
*/
static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc,
void *data)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
loff_t i_size = i_size_read(inode), synced_i_size;
pgoff_t end_index = i_size >> PAGE_SHIFT;
int err, len = i_size & (PAGE_SIZE - 1);
void *kaddr;
int err, len = folio_size(folio);
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
inode->i_ino, page->index, page->flags);
ubifs_assert(c, PagePrivate(page));
inode->i_ino, folio->index, folio->flags);
ubifs_assert(c, folio->private != NULL);
/* Is the page fully outside @i_size? (truncate in progress) */
if (page->index > end_index || (page->index == end_index && !len)) {
/* Is the folio fully outside @i_size? (truncate in progress) */
if (folio_pos(folio) >= i_size) {
err = 0;
goto out_unlock;
}
......@@ -1030,9 +1025,9 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
synced_i_size = ui->synced_i_size;
spin_unlock(&ui->ui_lock);
/* Is the page fully inside @i_size? */
if (page->index < end_index) {
if (page->index >= synced_i_size >> PAGE_SHIFT) {
/* Is the folio fully inside i_size? */
if (folio_pos(folio) + len <= i_size) {
if (folio_pos(folio) >= synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_redirty;
......@@ -1045,20 +1040,18 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* with this.
*/
}
return do_writepage(page, PAGE_SIZE);
return do_writepage(folio, len);
}
/*
* The page straddles @i_size. It must be zeroed out on each and every
* The folio straddles @i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
kaddr = kmap_atomic(page);
memset(kaddr + len, 0, PAGE_SIZE - len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
len = i_size - folio_pos(folio);
folio_zero_segment(folio, len, folio_size(folio));
if (i_size > synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
......@@ -1066,19 +1059,25 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
goto out_redirty;
}
return do_writepage(page, len);
return do_writepage(folio, len);
out_redirty:
/*
* redirty_page_for_writepage() won't call ubifs_dirty_inode() because
* folio_redirty_for_writepage() won't call ubifs_dirty_inode() because
* it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
* there is no need to do space budget for dirty inode.
*/
redirty_page_for_writepage(wbc, page);
folio_redirty_for_writepage(wbc, folio);
out_unlock:
unlock_page(page);
folio_unlock(folio);
return err;
}
static int ubifs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
return write_cache_pages(mapping, wbc, ubifs_writepage, NULL);
}
/**
* do_attr_changes - change inode attributes.
* @inode: inode to change attributes for
......@@ -1155,11 +1154,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
if (offset) {
pgoff_t index = new_size >> PAGE_SHIFT;
struct page *page;
struct folio *folio;
page = find_lock_page(inode->i_mapping, index);
if (page) {
if (PageDirty(page)) {
folio = filemap_lock_folio(inode->i_mapping, index);
if (!IS_ERR(folio)) {
if (folio_test_dirty(folio)) {
/*
* 'ubifs_jnl_truncate()' will try to truncate
* the last data node, but it contains
......@@ -1168,14 +1167,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* 'ubifs_jnl_truncate()' will see an already
* truncated (and up to date) data node.
*/
ubifs_assert(c, PagePrivate(page));
ubifs_assert(c, folio->private != NULL);
clear_page_dirty_for_io(page);
folio_clear_dirty_for_io(folio);
if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
offset = new_size &
(PAGE_SIZE - 1);
err = do_writepage(page, offset);
put_page(page);
offset = offset_in_folio(folio,
new_size);
err = do_writepage(folio, offset);
folio_put(folio);
if (err)
goto out_budg;
/*
......@@ -1188,8 +1187,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* to 'ubifs_jnl_truncate()' to save it from
* having to read it.
*/
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
}
}
}
......@@ -1512,14 +1511,14 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
*/
static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec64 now = current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
int err, update_time;
dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index,
i_size_read(inode));
ubifs_assert(c, !c->ro_media && !c->ro_mount);
......@@ -1527,17 +1526,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS; /* -EROFS */
/*
* We have not locked @page so far so we may budget for changing the
* page. Note, we cannot do this after we locked the page, because
* We have not locked @folio so far so we may budget for changing the
* folio. Note, we cannot do this after we locked the folio, because
* budgeting may cause write-back which would cause deadlock.
*
* At the moment we do not know whether the page is dirty or not, so we
* assume that it is not and budget for a new page. We could look at
* At the moment we do not know whether the folio is dirty or not, so we
* assume that it is not and budget for a new folio. We could look at
* the @PG_private flag and figure this out, but we may race with write
* back and the page state may change by the time we lock it, so this
* back and the folio state may change by the time we lock it, so this
* would need additional care. We do not bother with this at the
* moment, although it might be good idea to do. Instead, we allocate
* budget for a new page and amend it later on if the page was in fact
* budget for a new folio and amend it later on if the folio was in fact
* dirty.
*
* The budgeting-related logic of this function is similar to what we
......@@ -1560,21 +1559,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode))) {
/* Page got truncated out from underneath us */
folio_lock(folio);
if (unlikely(folio->mapping != inode->i_mapping ||
folio_pos(folio) >= i_size_read(inode))) {
/* Folio got truncated out from underneath us */
goto sigbus;
}
if (PagePrivate(page))
if (folio->private)
release_new_page_budget(c);
else {
if (!PageChecked(page))
if (!folio_test_checked(folio))
ubifs_convert_page_budget(c);
attach_page_private(page, (void *)1);
folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
__set_page_dirty_nobuffers(page);
filemap_dirty_folio(folio->mapping, folio);
}
if (update_time) {
......@@ -1590,11 +1589,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
ubifs_release_dirty_inode_budget(c, ui);
}
wait_for_stable_page(page);
folio_wait_stable(folio);
return VM_FAULT_LOCKED;
sigbus:
unlock_page(page);
folio_unlock(folio);
ubifs_release_budget(c, &req);
return VM_FAULT_SIGBUS;
}
......@@ -1648,7 +1647,7 @@ static int ubifs_symlink_getattr(struct mnt_idmap *idmap,
const struct address_space_operations ubifs_file_address_operations = {
.read_folio = ubifs_read_folio,
.writepage = ubifs_writepage,
.writepages = ubifs_writepages,
.write_begin = ubifs_write_begin,
.write_end = ubifs_write_end,
.invalidate_folio = ubifs_invalidate_folio,
......
......@@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
*/
static int scan_for_dirty_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
void *arg)
{
struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
......@@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
data.exclude_index = exclude_index;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_dirty_cb,
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb,
&data);
if (err)
return ERR_PTR(err);
......@@ -349,8 +349,9 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
*/
static int scan_for_free_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
void *arg)
{
struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
......@@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_free_cb,
scan_for_free_cb,
&data);
if (err)
return ERR_PTR(err);
......@@ -589,8 +590,9 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
*/
static int scan_for_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
void *arg)
{
struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
......@@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
int err;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_for_idx_cb,
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb,
&data);
if (err)
return ERR_PTR(err);
......@@ -726,11 +727,10 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
return err;
}
static int cmp_dirty_idx(const struct ubifs_lprops **a,
const struct ubifs_lprops **b)
static int cmp_dirty_idx(const void *a, const void *b)
{
const struct ubifs_lprops *lpa = *a;
const struct ubifs_lprops *lpb = *b;
const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a;
const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b;
return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
}
......@@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
sizeof(void *) * c->dirty_idx.cnt);
/* Sort it so that the dirtiest is now at the end */
sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
(int (*)(const void *, const void *))cmp_dirty_idx, NULL);
cmp_dirty_idx, NULL);
dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
if (c->dirty_idx.cnt)
dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
......@@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
*/
static int scan_dirty_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
struct scan_data *data)
void *arg)
{
struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
......@@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c)
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return -ENOSPC;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
(ubifs_lpt_scan_callback)scan_dirty_idx_cb,
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb,
&data);
if (err)
return err;
......
......@@ -292,6 +292,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
return err;
}
/**
* __queue_and_wait - queue a task and wait until the task is waked up.
* @c: UBIFS file-system description object
*
* This function adds current task in queue and waits until the task is waked
* up. This function should be called with @c->reserve_space_wq locked.
*/
static void __queue_and_wait(struct ubifs_info *c)
{
DEFINE_WAIT(wait);
__add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&c->reserve_space_wq.lock);
schedule();
finish_wait(&c->reserve_space_wq, &wait);
}
/**
* wait_for_reservation - try queuing current task to wait until waked up.
* @c: UBIFS file-system description object
*
* This function queues current task to wait until waked up, if queuing is
* started(@c->need_wait_space is not %0). Returns %true if current task is
* added in queue, otherwise %false is returned.
*/
static bool wait_for_reservation(struct ubifs_info *c)
{
if (likely(atomic_read(&c->need_wait_space) == 0))
/* Quick path to check whether queuing is started. */
return false;
spin_lock(&c->reserve_space_wq.lock);
if (atomic_read(&c->need_wait_space) == 0) {
/* Queuing is not started, don't queue current task. */
spin_unlock(&c->reserve_space_wq.lock);
return false;
}
__queue_and_wait(c);
return true;
}
/**
* wake_up_reservation - wake up first task in queue or stop queuing.
* @c: UBIFS file-system description object
*
* This function wakes up the first task in queue if it exists, or stops
* queuing if no tasks in queue.
*/
static void wake_up_reservation(struct ubifs_info *c)
{
spin_lock(&c->reserve_space_wq.lock);
if (waitqueue_active(&c->reserve_space_wq))
wake_up_locked(&c->reserve_space_wq);
else
/*
* Compared with wait_for_reservation(), set @c->need_wait_space
* under the protection of wait queue lock, which can avoid that
* @c->need_wait_space is set to 0 after new task queued.
*/
atomic_set(&c->need_wait_space, 0);
spin_unlock(&c->reserve_space_wq.lock);
}
/**
* wake_up_reservation - add current task in queue or start queuing.
* @c: UBIFS file-system description object
*
* This function starts queuing if queuing is not started, otherwise adds
* current task in queue.
*/
static void add_or_start_queue(struct ubifs_info *c)
{
spin_lock(&c->reserve_space_wq.lock);
if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) {
/* Starts queuing, task can go on directly. */
spin_unlock(&c->reserve_space_wq.lock);
return;
}
/*
* There are at least two tasks have retried more than 32 times
* at certain point, first task has started queuing, just queue
* the left tasks.
*/
__queue_and_wait(c);
}
/**
* make_reservation - reserve journal space.
* @c: UBIFS file-system description object
......@@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
static int make_reservation(struct ubifs_info *c, int jhead, int len)
{
int err, cmt_retries = 0, nospc_retries = 0;
bool blocked = wait_for_reservation(c);
again:
down_read(&c->commit_sem);
err = reserve_space(c, jhead, len);
if (!err)
if (!err) {
/* c->commit_sem will get released via finish_reservation(). */
return 0;
goto out_wake_up;
}
up_read(&c->commit_sem);
if (err == -ENOSPC) {
/*
* GC could not make any progress. We should try to commit
* once because it could make some dirty space and GC would
* make progress, so make the error -EAGAIN so that the below
* because it could make some dirty space and GC would make
* progress, so make the error -EAGAIN so that the below
* will commit and re-try.
*/
if (nospc_retries++ < 2) {
dbg_jnl("no space, retry");
err = -EAGAIN;
}
/*
* This means that the budgeting is incorrect. We always have
* to be able to write to the media, because all operations are
* budgeted. Deletions are not budgeted, though, but we reserve
* an extra LEB for them.
*/
nospc_retries++;
dbg_jnl("no space, retry");
err = -EAGAIN;
}
if (err != -EAGAIN)
......@@ -349,15 +433,37 @@ static int make_reservation(struct ubifs_info *c, int jhead, int len)
*/
if (cmt_retries > 128) {
/*
* This should not happen unless the journal size limitations
* are too tough.
* This should not happen unless:
* 1. The journal size limitations are too tough.
* 2. The budgeting is incorrect. We always have to be able to
* write to the media, because all operations are budgeted.
* Deletions are not budgeted, though, but we reserve an
* extra LEB for them.
*/
ubifs_err(c, "stuck in space allocation");
ubifs_err(c, "stuck in space allocation, nospc_retries %d",
nospc_retries);
err = -ENOSPC;
goto out;
} else if (cmt_retries > 32)
ubifs_warn(c, "too many space allocation re-tries (%d)",
cmt_retries);
} else if (cmt_retries > 32) {
/*
* It's almost impossible to happen, unless there are many tasks
* making reservation concurrently and someone task has retried
* gc + commit for many times, generated available space during
* this period are grabbed by other tasks.
* But if it happens, start queuing up all tasks that will make
* space reservation, then there is only one task making space
* reservation at any time, and it can always make success under
* the premise of correct budgeting.
*/
ubifs_warn(c, "too many space allocation cmt_retries (%d) "
"nospc_retries (%d), start queuing tasks",
cmt_retries, nospc_retries);
if (!blocked) {
blocked = true;
add_or_start_queue(c);
}
}
dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
cmt_retries);
......@@ -365,7 +471,7 @@ static int make_reservation(struct ubifs_info *c, int jhead, int len)
err = ubifs_run_commit(c);
if (err)
return err;
goto out_wake_up;
goto again;
out:
......@@ -380,6 +486,27 @@ static int make_reservation(struct ubifs_info *c, int jhead, int len)
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
out_wake_up:
if (blocked) {
/*
* Only tasks that have ever started queuing or ever been queued
* can wake up other queued tasks, which can make sure that
* there is only one task waked up to make space reservation.
* For example:
* task A task B task C
* make_reservation make_reservation
* reserve_space // 0
* wake_up_reservation
* atomic_cmpxchg // 0, start queuing
* reserve_space
* wait_for_reservation
* __queue_and_wait
* add_wait_queue
* if (blocked) // false
* // So that task C won't be waked up to race with task B
*/
wake_up_reservation(c);
}
return err;
}
......
......@@ -1014,8 +1014,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
struct ubifs_lp_stats *lst)
void *arg)
{
struct ubifs_lp_stats *lst = arg;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
......@@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c)
memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
(ubifs_lpt_scan_callback)scan_check_cb,
&lst);
scan_check_cb, &lst);
if (err && err != -ENOSPC)
goto out;
......
......@@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
len -= node_len;
}
err = 0;
out:
vfree(buf);
return err;
......
......@@ -2151,6 +2151,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
init_waitqueue_head(&c->cmt_wq);
init_waitqueue_head(&c->reserve_space_wq);
atomic_set(&c->need_wait_space, 0);
c->buds = RB_ROOT;
c->old_idx = RB_ROOT;
c->size_tree = RB_ROOT;
......
......@@ -3116,14 +3116,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
void ubifs_tnc_close(struct ubifs_info *c)
{
tnc_destroy_cnext(c);
if (c->zroot.znode) {
long n, freed;
n = atomic_long_read(&c->clean_zn_cnt);
freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
ubifs_assert(c, freed == n);
atomic_long_sub(n, &ubifs_clean_zn_cnt);
}
ubifs_destroy_tnc_tree(c);
kfree(c->gap_lebs);
kfree(c->ilebs);
destroy_old_idx(c);
......
......@@ -250,6 +250,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
}
}
/**
* ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree.
* @c: UBIFS file-system description object
*
* This function destroys the whole TNC tree and updates clean global znode
* count.
*/
void ubifs_destroy_tnc_tree(struct ubifs_info *c)
{
long n, freed;
if (!c->zroot.znode)
return;
n = atomic_long_read(&c->clean_zn_cnt);
freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
ubifs_assert(c, freed == n);
atomic_long_sub(n, &ubifs_clean_zn_cnt);
c->zroot.znode = NULL;
}
/**
* read_znode - read an indexing node from flash and fill znode.
* @c: UBIFS file-system description object
......
......@@ -1047,6 +1047,8 @@ struct ubifs_debug_info;
* @bg_bud_bytes: number of bud bytes when background commit is initiated
* @old_buds: buds to be released after commit ends
* @max_bud_cnt: maximum number of buds
* @need_wait_space: Non %0 means space reservation tasks need to wait in queue
* @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0
*
* @commit_sem: synchronizes committer with other processes
* @cmt_state: commit state
......@@ -1305,6 +1307,8 @@ struct ubifs_info {
long long bg_bud_bytes;
struct list_head old_buds;
int max_bud_cnt;
atomic_t need_wait_space;
wait_queue_head_t reserve_space_wq;
struct rw_semaphore commit_sem;
int cmt_state;
......@@ -1903,6 +1907,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
struct ubifs_znode *znode);
long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
struct ubifs_znode *zr);
void ubifs_destroy_tnc_tree(struct ubifs_info *c);
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip);
......
......@@ -192,6 +192,7 @@ struct ubi_device_info {
* or a volume was removed)
* @UBI_VOLUME_RESIZED: a volume has been re-sized
* @UBI_VOLUME_RENAMED: a volume has been re-named
* @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users
* @UBI_VOLUME_UPDATED: data has been written to a volume
*
* These constants define which type of event has happened when a volume
......@@ -202,6 +203,7 @@ enum {
UBI_VOLUME_REMOVED,
UBI_VOLUME_RESIZED,
UBI_VOLUME_RENAMED,
UBI_VOLUME_SHUTDOWN,
UBI_VOLUME_UPDATED,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment